The aim of this part is exploring and studying the data before starting modeling.
There are 4 functions in the part:
1.Visualize the distributions of variables in the data set 2.Consider conditioning the continuous variables based on the categorical variables 3.Visualize the relationships between the continuous inputs
4.Visualize the relationships between the continuous outputs(response and the log-transformed response) with respect to the continuous inputs
5.Visualize the behavior of the binary outcome with respect to the continuous inputs
library(tidyverse)
## ── Attaching packages ─────────────────────────────────────── tidyverse 1.3.1 ──
## ✓ ggplot2 3.3.5 ✓ purrr 0.3.4
## ✓ tibble 3.1.6 ✓ dplyr 1.0.7
## ✓ tidyr 1.1.4 ✓ stringr 1.4.0
## ✓ readr 2.1.2 ✓ forcats 0.5.1
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## x dplyr::filter() masks stats::filter()
## x dplyr::lag() masks stats::lag()
library(visdat)
library(cowplot)
library(ggplot2)
library(reshape2)
##
## Attaching package: 'reshape2'
## The following object is masked from 'package:tidyr':
##
## smiths
library(purrr)
df_all <- readr::read_csv("final_project_train.csv", col_names = TRUE)
## Rows: 677 Columns: 38
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr (3): region, customer, outcome
## dbl (35): rowid, xb_01, xb_02, xb_03, xn_01, xn_02, xn_03, xa_01, xa_02, xa_...
##
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
df_all %>% glimpse()
## Rows: 677
## Columns: 38
## $ rowid <dbl> 1, 3, 4, 5, 8, 9, 11, 14, 15, 16, 17, 18, 19, 22, 24, 25, 27,…
## $ region <chr> "XX", "XX", "XX", "XX", "XX", "XX", "XX", "XX", "XX", "XX", "…
## $ customer <chr> "B", "B", "B", "B", "B", "B", "B", "B", "B", "B", "B", "B", "…
## $ xb_01 <dbl> 4.000000, 1.000000, 2.000000, 2.520000, 2.548387, 3.071429, 3…
## $ xb_02 <dbl> 4, 1, 2, 11, 6, 6, 10, 12, 9, 10, 8, 10, 10, 8, 6, 10, 13, 10…
## $ xb_03 <dbl> 4, 1, 2, -6, -1, 1, -4, -4, -2, -4, -2, -2, -2, -4, 1, -4, -3…
## $ xn_01 <dbl> 3.0000000, 2.0000000, 2.0000000, 1.5333333, 0.8387097, 1.8571…
## $ xn_02 <dbl> 3, 2, 4, 9, 3, 8, 6, 10, 10, 4, 6, 8, 9, 5, 7, 12, 12, 6, 6, …
## $ xn_03 <dbl> 3, 2, 0, -3, -4, -2, -5, -6, -3, -5, -3, -6, -4, -3, 0, -5, -…
## $ xa_01 <dbl> 12.000000, 3.000000, 9.000000, 7.080000, 6.451613, 6.857143, …
## $ xa_02 <dbl> 12, 3, 9, 29, 17, 18, 24, 27, 20, 19, 15, 24, 24, 15, 14, 26,…
## $ xa_03 <dbl> 12, 3, 9, -7, -2, 2, -9, -5, -3, -3, -1, 1, -2, -3, 3, -4, -5…
## $ xb_04 <dbl> 1.3333333, 1.0000000, 1.0000000, 0.8950476, 1.2247312, 1.1857…
## $ xb_05 <dbl> 1.3333333, 1.0000000, 1.0000000, -2.0000000, -0.5000000, 0.00…
## $ xb_06 <dbl> 1.333333, 1.000000, 1.000000, 4.000000, 4.000000, 3.000000, 6…
## $ xb_07 <dbl> 4.000000, 1.000000, 2.000000, 1.933333, 1.967742, 1.714286, 1…
## $ xb_08 <dbl> -1.00000000, 1.00000000, 0.00000000, -0.08000000, 0.35483871,…
## $ xn_04 <dbl> 1.0000000, 2.0000000, 1.0000000, 0.5268889, 0.4688172, 0.5607…
## $ xn_05 <dbl> 1.0000000, 2.0000000, 0.0000000, -1.0000000, -1.3333333, -1.0…
## $ xn_06 <dbl> 1.0, 2.0, 2.0, 2.5, 3.0, 2.0, 4.0, 4.0, 3.0, 2.0, 2.0, 2.5, 2…
## $ xn_07 <dbl> 3.000000, 2.000000, 2.500000, 1.493333, 1.225806, 1.642857, 1…
## $ xn_08 <dbl> -1.0000000, 2.0000000, -1.0000000, -0.4400000, -0.4516129, -0…
## $ xa_04 <dbl> 6.000000, 3.000000, 6.750000, 2.425333, 3.023656, 2.685714, 2…
## $ xa_05 <dbl> 6.0000000, 3.0000000, 4.5000000, -3.5000000, -0.6666667, 0.40…
## $ xa_06 <dbl> 6.000000, 3.000000, 9.000000, 9.000000, 13.000000, 6.000000, …
## $ xa_07 <dbl> 9.000000, 3.000000, 7.500000, 4.466667, 4.612903, 4.071429, 4…
## $ xa_08 <dbl> 3.0000000, 3.0000000, 6.0000000, 0.7066667, 1.3225806, 1.3571…
## $ xw_01 <dbl> 23.00000, 17.00000, 52.50000, 64.52564, 54.75758, 58.33333, 6…
## $ xw_02 <dbl> 23, 17, 48, 0, 12, 15, 0, 0, 0, 7, 14, 0, 0, 0, 8, 8, 0, 4, 2…
## $ xw_03 <dbl> 23, 17, 57, 106, 105, 101, 107, 109, 109, 104, 109, 99, 103, …
## $ xs_01 <dbl> 0.262073307, 0.330804757, 0.239795763, 0.142106837, 0.2442957…
## $ xs_02 <dbl> 0.26207331, 0.33080476, 0.19049123, -0.73321509, -0.12204299,…
## $ xs_03 <dbl> 0.2620733, 0.3308048, 0.2891003, 0.5500723, 1.3134719, 0.6540…
## $ xs_04 <dbl> 0.5375576, 0.4286607, 0.3676937, 0.2865445, 0.2375470, 0.2594…
## $ xs_05 <dbl> 0.5375575604, 0.4286607050, 0.2485001680, 0.0000000000, 0.043…
## $ xs_06 <dbl> 0.5375576, 0.4286607, 0.4868872, 0.6357541, 0.4327004, 0.8672…
## $ response <dbl> 2.617991, 1.184632, 2.216626, 2.726715, 1.483323, 2.039279, 1…
## $ outcome <chr> "non_event", "non_event", "event", "non_event", "non_event", …
visdat::vis_miss(df_all)
There is no missing data in the data set.
visdat::vis_dat(df_all)
The type of variables-{region,customer,outcome} is character, while the type of other variables is numeric. Therefore, the number of categorical variables is three.
df_all %>%
select(-region,-customer,-outcome,-rowid) %>%
glimpse()
## Rows: 677
## Columns: 34
## $ xb_01 <dbl> 4.000000, 1.000000, 2.000000, 2.520000, 2.548387, 3.071429, 3…
## $ xb_02 <dbl> 4, 1, 2, 11, 6, 6, 10, 12, 9, 10, 8, 10, 10, 8, 6, 10, 13, 10…
## $ xb_03 <dbl> 4, 1, 2, -6, -1, 1, -4, -4, -2, -4, -2, -2, -2, -4, 1, -4, -3…
## $ xn_01 <dbl> 3.0000000, 2.0000000, 2.0000000, 1.5333333, 0.8387097, 1.8571…
## $ xn_02 <dbl> 3, 2, 4, 9, 3, 8, 6, 10, 10, 4, 6, 8, 9, 5, 7, 12, 12, 6, 6, …
## $ xn_03 <dbl> 3, 2, 0, -3, -4, -2, -5, -6, -3, -5, -3, -6, -4, -3, 0, -5, -…
## $ xa_01 <dbl> 12.000000, 3.000000, 9.000000, 7.080000, 6.451613, 6.857143, …
## $ xa_02 <dbl> 12, 3, 9, 29, 17, 18, 24, 27, 20, 19, 15, 24, 24, 15, 14, 26,…
## $ xa_03 <dbl> 12, 3, 9, -7, -2, 2, -9, -5, -3, -3, -1, 1, -2, -3, 3, -4, -5…
## $ xb_04 <dbl> 1.3333333, 1.0000000, 1.0000000, 0.8950476, 1.2247312, 1.1857…
## $ xb_05 <dbl> 1.3333333, 1.0000000, 1.0000000, -2.0000000, -0.5000000, 0.00…
## $ xb_06 <dbl> 1.333333, 1.000000, 1.000000, 4.000000, 4.000000, 3.000000, 6…
## $ xb_07 <dbl> 4.000000, 1.000000, 2.000000, 1.933333, 1.967742, 1.714286, 1…
## $ xb_08 <dbl> -1.00000000, 1.00000000, 0.00000000, -0.08000000, 0.35483871,…
## $ xn_04 <dbl> 1.0000000, 2.0000000, 1.0000000, 0.5268889, 0.4688172, 0.5607…
## $ xn_05 <dbl> 1.0000000, 2.0000000, 0.0000000, -1.0000000, -1.3333333, -1.0…
## $ xn_06 <dbl> 1.0, 2.0, 2.0, 2.5, 3.0, 2.0, 4.0, 4.0, 3.0, 2.0, 2.0, 2.5, 2…
## $ xn_07 <dbl> 3.000000, 2.000000, 2.500000, 1.493333, 1.225806, 1.642857, 1…
## $ xn_08 <dbl> -1.0000000, 2.0000000, -1.0000000, -0.4400000, -0.4516129, -0…
## $ xa_04 <dbl> 6.000000, 3.000000, 6.750000, 2.425333, 3.023656, 2.685714, 2…
## $ xa_05 <dbl> 6.0000000, 3.0000000, 4.5000000, -3.5000000, -0.6666667, 0.40…
## $ xa_06 <dbl> 6.000000, 3.000000, 9.000000, 9.000000, 13.000000, 6.000000, …
## $ xa_07 <dbl> 9.000000, 3.000000, 7.500000, 4.466667, 4.612903, 4.071429, 4…
## $ xa_08 <dbl> 3.0000000, 3.0000000, 6.0000000, 0.7066667, 1.3225806, 1.3571…
## $ xw_01 <dbl> 23.00000, 17.00000, 52.50000, 64.52564, 54.75758, 58.33333, 6…
## $ xw_02 <dbl> 23, 17, 48, 0, 12, 15, 0, 0, 0, 7, 14, 0, 0, 0, 8, 8, 0, 4, 2…
## $ xw_03 <dbl> 23, 17, 57, 106, 105, 101, 107, 109, 109, 104, 109, 99, 103, …
## $ xs_01 <dbl> 0.262073307, 0.330804757, 0.239795763, 0.142106837, 0.2442957…
## $ xs_02 <dbl> 0.26207331, 0.33080476, 0.19049123, -0.73321509, -0.12204299,…
## $ xs_03 <dbl> 0.2620733, 0.3308048, 0.2891003, 0.5500723, 1.3134719, 0.6540…
## $ xs_04 <dbl> 0.5375576, 0.4286607, 0.3676937, 0.2865445, 0.2375470, 0.2594…
## $ xs_05 <dbl> 0.5375575604, 0.4286607050, 0.2485001680, 0.0000000000, 0.043…
## $ xs_06 <dbl> 0.5375576, 0.4286607, 0.4868872, 0.6357541, 0.4327004, 0.8672…
## $ response <dbl> 2.617991, 1.184632, 2.216626, 2.726715, 1.483323, 2.039279, 1…
df_continuous_all<-df_all %>%
select(-region,-customer,-outcome,-rowid)
b<-df_all %>% select(starts_with("xb_"))%>%colnames
n<-df_all %>% select(starts_with("xn_"))%>%colnames
a<-df_all %>% select(starts_with("xa_"))%>%colnames
w<-df_all %>% select(starts_with("xw_"))%>%colnames
s<-df_all %>% select(starts_with("xs_"))%>%colnames
xb<-melt(data=df_all%>%select(starts_with("xb_")),variable.name = "X_Variables",value.name = "X_Distribution")
## No id variables; using all as measure variables
ggplot(data = xb, aes(x=X_Variables, y=X_Distribution)) + geom_boxplot()
b1<-df_all %>%ggplot(mapping=aes(x=xb_01))+geom_histogram()
b2<-df_all %>%ggplot(mapping=aes(x=xb_02))+geom_histogram()
b3<-df_all %>%ggplot(mapping=aes(x=xb_03))+geom_histogram()
b4<-df_all %>%ggplot(mapping=aes(x=xb_04))+geom_histogram()
b5<-df_all %>%ggplot(mapping=aes(x=xb_05))+geom_histogram()
b6<-df_all %>%ggplot(mapping=aes(x=xb_06))+geom_histogram()
b7<-df_all %>%ggplot(mapping=aes(x=xb_07))+geom_histogram()
b8<-df_all %>%ggplot(mapping=aes(x=xb_08))+geom_histogram()
plot_grid(b1,b2,b3,b4,b5,b6,b7,b8,
labels = b,
ncol = 4, nrow =2 )
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
xn<-melt(data=df_all%>%select(starts_with("xn_")),variable.name = "X_Variables",value.name = "X_Distribution")
## No id variables; using all as measure variables
ggplot(data = xn, aes(x=X_Variables, y=X_Distribution)) + geom_boxplot()
n1<-df_all %>%ggplot(mapping=aes(x=xn_01))+geom_histogram()
n2<-df_all %>%ggplot(mapping=aes(x=xn_02))+geom_histogram()
n3<-df_all %>%ggplot(mapping=aes(x=xn_03))+geom_histogram()
n4<-df_all %>%ggplot(mapping=aes(x=xn_04))+geom_histogram()
n5<-df_all %>%ggplot(mapping=aes(x=xn_05))+geom_histogram()
n6<-df_all %>%ggplot(mapping=aes(x=xn_06))+geom_histogram()
n7<-df_all %>%ggplot(mapping=aes(x=xn_07))+geom_histogram()
n8<-df_all %>%ggplot(mapping=aes(x=xn_08))+geom_histogram()
plot_grid(n1,n2,n3,n4,n5,n6,n7,n8,
labels = n,
ncol = 4, nrow =2 )
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
xa<-melt(data=df_all%>%select(starts_with("xa_")),variable.name = "X_Variables",value.name = "X_Distribution")
## No id variables; using all as measure variables
ggplot(data = xa, aes(x=X_Variables, y=X_Distribution)) + geom_boxplot()
a1<-df_all %>%ggplot(mapping=aes(x=xa_01))+geom_histogram(bins=50)
a2<-df_all %>%ggplot(mapping=aes(x=xa_02))+geom_histogram()
a3<-df_all %>%ggplot(mapping=aes(x=xa_03))+geom_histogram()
a4<-df_all %>%ggplot(mapping=aes(x=xa_04))+geom_histogram()
a5<-df_all %>%ggplot(mapping=aes(x=xa_05))+geom_histogram()
a6<-df_all %>%ggplot(mapping=aes(x=xa_06))+geom_histogram()
a7<-df_all %>%ggplot(mapping=aes(x=xa_07))+geom_histogram()
a8<-df_all %>%ggplot(mapping=aes(x=xa_08))+geom_histogram()
plot_grid(a1,a2,a3,a4,a5,a6,a7,a8,
labels = a,
ncol = 4, nrow =2 )
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
xw<-melt(data=df_all%>%select(starts_with("xw_")),variable.name = "X_Variables",value.name = "X_Distribution")
## No id variables; using all as measure variables
ggplot(data = xw, aes(x=X_Variables, y=X_Distribution)) + geom_boxplot()
w1<-df_all %>%ggplot(mapping=aes(x=xw_01))+geom_histogram(bins=55)
w2<-df_all %>%ggplot(mapping=aes(x=xw_02))+geom_histogram(bins=55)
w3<-df_all %>%ggplot(mapping=aes(x=xw_03))+geom_histogram(bins=55)
plot_grid(w1,w2,w3,
labels = w,
ncol = 2, nrow =2 )
xs<-melt(data=df_all%>%select(starts_with("xs_")),variable.name = "X_Variables",value.name = "X_Distribution")
## No id variables; using all as measure variables
ggplot(data = xs, aes(x=X_Variables, y=X_Distribution)) + geom_boxplot()
s1<-df_all %>%ggplot(mapping=aes(x=xs_01))+geom_histogram()
s2<-df_all %>%ggplot(mapping=aes(x=xs_02))+geom_histogram()
s3<-df_all %>%ggplot(mapping=aes(x=xs_03))+geom_histogram()
s4<-df_all %>%ggplot(mapping=aes(x=xs_04))+geom_histogram()
s5<-df_all %>%ggplot(mapping=aes(x=xs_05))+geom_histogram(bins=55)
s6<-df_all %>%ggplot(mapping=aes(x=xs_06))+geom_histogram(bins=55)
plot_grid(s1,s2,s3,s4,s5,s6,
labels = s,
ncol = 3, nrow =2 )
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
ggplot(df_all,mapping=aes(x=response))+geom_histogram(bins=55)
ggplot(df_all,mapping=aes(x=response))+geom_boxplot()
Based on the figures above,
the variables which start with “xb_”,“xn_” and “xs_” are all in Gaussian distribution.
For variables start with “xw_” and “xs_”, only variables-{xw_02,xw_03,xs_05} doesn’t look like Gaussian distribution, while others are all in Gaussian distribution.
For the variable-“response”,it doesn’t look like Gaussian distribution, it is more like a positively skewed distribution.
Xb_outcome<-melt(data=df_all%>%select(starts_with("xb_"),"outcome"),id.vars="outcome",variable.name = "Xb_Variables",value.name = "Xb_Distribution")
Xb_outcome %>%
ggplot(mapping=aes(x=Xb_Variables,y=Xb_Distribution))+
geom_boxplot(aes(fill=outcome))
df_all %>%
select(b,outcome)%>%
split(.$outcome) %>% map(summary)
## Note: Using an external vector in selections is ambiguous.
## ℹ Use `all_of(b)` instead of `b` to silence this message.
## ℹ See <https://tidyselect.r-lib.org/reference/faq-external-vector.html>.
## This message is displayed once per session.
## $event
## xb_01 xb_02 xb_03 xb_04
## Min. :-4.000 Min. :-4.000 Min. :-7.0000 Min. :-2.0000
## 1st Qu.: 1.050 1st Qu.: 2.500 1st Qu.:-1.0000 1st Qu.: 0.5000
## Median : 2.571 Median : 5.000 Median : 0.0000 Median : 0.9722
## Mean : 2.336 Mean : 4.669 Mean : 0.1811 Mean : 0.8315
## 3rd Qu.: 3.400 3rd Qu.: 7.000 3rd Qu.: 1.5000 3rd Qu.: 1.2069
## Max. : 7.000 Max. :14.000 Max. : 7.0000 Max. : 2.3333
## xb_05 xb_06 xb_07 xb_08
## Min. :-2.0000 Min. :-2.000 Min. :-1.000 Min. :-4.0000
## 1st Qu.:-0.5000 1st Qu.: 1.000 1st Qu.: 1.396 1st Qu.:-0.7667
## Median : 0.0000 Median : 1.667 Median : 1.812 Median : 0.0000
## Mean : 0.0790 Mean : 1.750 Mean : 1.794 Mean :-0.1639
## 3rd Qu.: 0.6333 3rd Qu.: 2.500 3rd Qu.: 2.099 3rd Qu.: 0.3661
## Max. : 2.3333 Max. : 6.000 Max. : 5.000 Max. : 2.0000
## outcome
## Length:127
## Class :character
## Mode :character
##
##
##
##
## $non_event
## xb_01 xb_02 xb_03 xb_04
## Min. :-2.000 Min. :-2.000 Min. :-6.000 Min. :-1.0000
## 1st Qu.: 2.667 1st Qu.: 4.000 1st Qu.:-1.000 1st Qu.: 0.9383
## Median : 3.429 Median : 6.000 Median : 1.000 Median : 1.1974
## Mean : 3.618 Mean : 5.998 Mean : 1.456 Mean : 1.2273
## 3rd Qu.: 4.500 3rd Qu.: 8.000 3rd Qu.: 3.000 3rd Qu.: 1.5000
## Max. :14.000 Max. :15.000 Max. :14.000 Max. : 5.0000
## xb_05 xb_06 xb_07 xb_08
## Min. :-3.0000 Min. :-1.000 Min. :-1.000 Min. :-2.0000
## 1st Qu.:-0.2500 1st Qu.: 1.250 1st Qu.: 1.802 1st Qu.:-0.1667
## Median : 0.5000 Median : 2.000 Median : 2.000 Median : 0.2824
## Mean : 0.4839 Mean : 2.189 Mean : 2.167 Mean : 0.2993
## 3rd Qu.: 1.0000 3rd Qu.: 3.000 3rd Qu.: 2.500 3rd Qu.: 1.0000
## Max. : 5.0000 Max. : 9.000 Max. : 7.000 Max. : 5.0000
## outcome
## Length:550
## Class :character
## Mode :character
##
##
##
Xb_region<-melt(data=df_all%>%select(starts_with("xb_"),region),id.vars="region",variable.name = "Xb_Variables",value.name = "Xb_Distribution")
Xb_region %>%
ggplot(mapping=aes(x=Xb_Variables,y=Xb_Distribution))+
geom_boxplot(aes(fill=region))
df_all %>%
select(b,region)%>%
split(.$region) %>% map(summary)
## $XX
## xb_01 xb_02 xb_03 xb_04
## Min. :-1.000 Min. :-1.000 Min. :-6.0000 Min. :-0.3333
## 1st Qu.: 2.625 1st Qu.: 4.000 1st Qu.:-2.0000 1st Qu.: 0.9056
## Median : 3.250 Median : 7.000 Median : 0.0000 Median : 1.1039
## Mean : 3.354 Mean : 6.708 Mean : 0.3851 Mean : 1.1081
## 3rd Qu.: 4.000 3rd Qu.: 9.000 3rd Qu.: 2.0000 3rd Qu.: 1.2884
## Max. :12.000 Max. :15.000 Max. :12.0000 Max. : 4.0000
## xb_05 xb_06 xb_07 xb_08
## Min. :-3.00000 Min. :-0.3333 Min. :0.000 Min. :-2.0000
## 1st Qu.:-0.66667 1st Qu.: 1.5000 1st Qu.:1.714 1st Qu.:-0.1000
## Median : 0.00000 Median : 2.0000 Median :2.000 Median : 0.1606
## Mean : 0.04583 Mean : 2.4198 Mean :2.030 Mean : 0.1593
## 3rd Qu.: 0.75000 3rd Qu.: 3.0000 3rd Qu.:2.250 3rd Qu.: 0.5000
## Max. : 4.00000 Max. : 7.0000 Max. :7.000 Max. : 4.0000
## region
## Length:161
## Class :character
## Mode :character
##
##
##
##
## $YY
## xb_01 xb_02 xb_03 xb_04
## Min. :-2.000 Min. :-2.000 Min. :-7.00000 Min. :-0.5000
## 1st Qu.: 2.557 1st Qu.: 4.000 1st Qu.:-2.00000 1st Qu.: 0.8622
## Median : 3.231 Median : 7.000 Median : 0.00000 Median : 1.0528
## Mean : 3.194 Mean : 6.676 Mean :-0.01351 Mean : 1.0448
## 3rd Qu.: 3.980 3rd Qu.: 9.000 3rd Qu.: 1.75000 3rd Qu.: 1.2881
## Max. :10.000 Max. :15.000 Max. :10.00000 Max. : 3.0000
## xb_05 xb_06 xb_07 xb_08
## Min. :-2.50000 Min. :-0.500 Min. :0.000 Min. :-4.00000
## 1st Qu.:-0.66667 1st Qu.: 1.500 1st Qu.:1.719 1st Qu.:-0.20625
## Median : 0.00000 Median : 2.000 Median :2.000 Median : 0.09091
## Mean :-0.01128 Mean : 2.503 Mean :2.005 Mean : 0.10428
## 3rd Qu.: 0.65000 3rd Qu.: 3.500 3rd Qu.:2.250 3rd Qu.: 0.50000
## Max. : 3.00000 Max. : 9.000 Max. :5.000 Max. : 3.00000
## region
## Length:222
## Class :character
## Mode :character
##
##
##
##
## $ZZ
## xb_01 xb_02 xb_03 xb_04
## Min. :-4.000 Min. :-4.000 Min. :-4.000 Min. :-2.000
## 1st Qu.: 2.000 1st Qu.: 3.000 1st Qu.: 1.000 1st Qu.: 0.750
## Median : 3.208 Median : 4.500 Median : 2.000 Median : 1.292
## Mean : 3.528 Mean : 4.524 Mean : 2.602 Mean : 1.259
## 3rd Qu.: 5.000 3rd Qu.: 7.000 3rd Qu.: 4.000 3rd Qu.: 1.665
## Max. :14.000 Max. :14.000 Max. :14.000 Max. : 5.000
## xb_05 xb_06 xb_07 xb_08
## Min. :-2.0000 Min. :-2.000 Min. :-1.000 Min. :-4.0000
## 1st Qu.: 0.3333 1st Qu.: 1.000 1st Qu.: 1.667 1st Qu.:-0.5000
## Median : 1.0000 Median : 1.500 Median : 2.000 Median : 0.5000
## Mean : 0.9227 Mean : 1.636 Mean : 2.204 Mean : 0.3231
## 3rd Qu.: 1.5000 3rd Qu.: 2.000 3rd Qu.: 3.000 3rd Qu.: 1.0000
## Max. : 5.0000 Max. : 8.000 Max. : 6.000 Max. : 5.0000
## region
## Length:294
## Class :character
## Mode :character
##
##
##
Xb_customer<-melt(data=df_all%>%select(starts_with("xb_"),customer),id.vars="customer",variable.name = "Xb_Variables",value.name = "Xb_Distribution")
Xb_customer %>%
ggplot(mapping=aes(x=Xb_Variables,y=Xb_Distribution))+
geom_boxplot(aes(fill=customer))
df_all %>%
select(s,customer)%>%
split(.$customer) %>% map(summary)
## Note: Using an external vector in selections is ambiguous.
## ℹ Use `all_of(s)` instead of `s` to silence this message.
## ℹ See <https://tidyselect.r-lib.org/reference/faq-external-vector.html>.
## This message is displayed once per session.
## $A
## xs_01 xs_02 xs_03 xs_04
## Min. :-0.2177 Min. :-0.23440 Min. :-0.2177 Min. :0.04969
## 1st Qu.: 0.1355 1st Qu.: 0.05937 1st Qu.: 0.1431 1st Qu.:0.21687
## Median : 0.2445 Median : 0.16595 Median : 0.2584 Median :0.29383
## Mean : 0.2230 Mean : 0.17350 Mean : 0.2696 Mean :0.30833
## 3rd Qu.: 0.3194 3rd Qu.: 0.29588 3rd Qu.: 0.3645 3rd Qu.:0.36930
## Max. : 0.7216 Max. : 0.66528 Max. : 1.2095 Max. :0.68960
## xs_05 xs_06 customer
## Min. :0.04969 Min. :0.04969 Length:55
## 1st Qu.:0.20251 1st Qu.:0.21687 Class :character
## Median :0.23911 Median :0.30266 Mode :character
## Mean :0.27724 Mean :0.34653
## 3rd Qu.:0.30766 3rd Qu.:0.45327
## Max. :0.68960 Max. :1.22736
##
## $B
## xs_01 xs_02 xs_03 xs_04
## Min. :-0.1426 Min. :-0.73322 Min. :-0.1324 Min. :0.1173
## 1st Qu.: 0.1628 1st Qu.:-0.37820 1st Qu.: 0.3954 1st Qu.:0.2591
## Median : 0.1904 Median :-0.21077 Median : 0.5948 Median :0.2775
## Mean : 0.1890 Mean :-0.19880 Mean : 0.6143 Mean :0.2848
## 3rd Qu.: 0.2140 3rd Qu.:-0.05854 3rd Qu.: 0.8211 3rd Qu.:0.3044
## Max. : 0.4850 Max. : 0.48504 Max. : 1.4050 Max. :0.5376
## xs_05 xs_06 customer
## Min. :0.000000 Min. :0.1173 Length:52
## 1st Qu.:0.008229 1st Qu.:0.4458 Class :character
## Median :0.047146 Median :0.6086 Mode :character
## Mean :0.092405 Mean :0.6151
## 3rd Qu.:0.117775 3rd Qu.:0.8177
## Max. :0.537558 Max. :1.2413
##
## $D
## xs_01 xs_02 xs_03 xs_04
## Min. :0.08792 Min. :-0.64470 Min. :0.2432 Min. :0.1440
## 1st Qu.:0.21626 1st Qu.:-0.20842 1st Qu.:0.3987 1st Qu.:0.2887
## Median :0.24520 Median :-0.08711 Median :0.5365 Median :0.3247
## Mean :0.24986 Mean :-0.07659 Mean :0.6376 Mean :0.3383
## 3rd Qu.:0.26532 3rd Qu.: 0.05179 3rd Qu.:0.8572 3rd Qu.:0.3538
## Max. :0.44501 Max. : 0.44501 Max. :1.2029 Max. :0.6473
## xs_05 xs_06 customer
## Min. :0.003697 Min. :0.1710 Length:32
## 1st Qu.:0.051346 1st Qu.:0.4426 Class :character
## Median :0.093479 Median :0.6289 Mode :character
## Mean :0.155485 Mean :0.6669
## 3rd Qu.:0.193913 3rd Qu.:0.7897
## Max. :0.647317 Max. :1.3088
##
## $E
## xs_01 xs_02 xs_03 xs_04
## Min. :0.04066 Min. :-0.8959 Min. :0.1481 Min. :0.1767
## 1st Qu.:0.21094 1st Qu.:-0.3637 1st Qu.:0.5271 1st Qu.:0.2995
## Median :0.23238 Median :-0.1873 Median :0.7167 Median :0.3167
## Mean :0.22909 Mean :-0.2150 Mean :0.7367 Mean :0.3189
## 3rd Qu.:0.25847 3rd Qu.: 0.0000 3rd Qu.:0.8551 3rd Qu.:0.3325
## Max. :0.37722 Max. : 0.2734 Max. :1.7907 Max. :0.5171
## xs_05 xs_06 customer
## Min. :0.00000 Min. :0.2847 Length:35
## 1st Qu.:0.01384 1st Qu.:0.5446 Class :character
## Median :0.06259 Median :0.6837 Mode :character
## Mean :0.08784 Mean :0.6986
## 3rd Qu.:0.10509 3rd Qu.:0.8342
## Max. :0.44002 Max. :1.1698
##
## $G
## xs_01 xs_02 xs_03 xs_04
## Min. :-0.3612 Min. :-0.3612 Min. :-0.3612 Min. :0.0000
## 1st Qu.: 0.1371 1st Qu.: 0.0000 1st Qu.: 0.1578 1st Qu.:0.2345
## Median : 0.2481 Median : 0.1218 Median : 0.3191 Median :0.2906
## Mean : 0.2211 Mean : 0.1274 Mean : 0.3285 Mean :0.3094
## 3rd Qu.: 0.3160 3rd Qu.: 0.2598 3rd Qu.: 0.4415 3rd Qu.:0.3624
## Max. : 0.5770 Max. : 0.4924 Max. : 1.2814 Max. :0.7498
## xs_05 xs_06 customer
## Min. :0.0000 Min. :0.0000 Length:113
## 1st Qu.:0.1449 1st Qu.:0.2749 Class :character
## Median :0.2228 Median :0.3810 Mode :character
## Mean :0.2428 Mean :0.3872
## 3rd Qu.:0.3200 3rd Qu.:0.4748
## Max. :0.6043 Max. :1.2703
##
## $K
## xs_01 xs_02 xs_03 xs_04
## Min. :-0.1929 Min. :-0.45588 Min. :-0.1929 Min. :0.02511
## 1st Qu.: 0.1099 1st Qu.: 0.00000 1st Qu.: 0.1874 1st Qu.:0.23880
## Median : 0.2545 Median : 0.10806 Median : 0.3628 Median :0.29826
## Mean : 0.2198 Mean : 0.09952 Mean : 0.3385 Mean :0.30084
## 3rd Qu.: 0.3143 3rd Qu.: 0.21411 3rd Qu.: 0.4946 3rd Qu.:0.37644
## Max. : 0.5294 Max. : 0.52943 Max. : 0.7334 Max. :0.59517
## xs_05 xs_06 customer
## Min. :0.02511 Min. :0.02511 Length:38
## 1st Qu.:0.10866 1st Qu.:0.28099 Class :character
## Median :0.21802 Median :0.36633 Mode :character
## Mean :0.23749 Mean :0.39190
## 3rd Qu.:0.31286 3rd Qu.:0.50790
## Max. :0.59517 Max. :0.87500
##
## $M
## xs_01 xs_02 xs_03 xs_04
## Min. :-0.1119 Min. :-0.2357830 Min. :-0.1119 Min. :0.01455
## 1st Qu.: 0.1298 1st Qu.: 0.0005822 1st Qu.: 0.1843 1st Qu.:0.23238
## Median : 0.2209 Median : 0.1301042 Median : 0.2947 Median :0.27622
## Mean : 0.2341 Mean : 0.1373717 Mean : 0.3271 Mean :0.30171
## 3rd Qu.: 0.2986 3rd Qu.: 0.2610634 3rd Qu.: 0.4167 3rd Qu.:0.35778
## Max. : 0.7548 Max. : 0.6910501 Max. : 1.1690 Max. :0.67638
## xs_05 xs_06 customer
## Min. :0.01455 Min. :0.01455 Length:71
## 1st Qu.:0.13840 1st Qu.:0.24491 Class :character
## Median :0.19951 Median :0.38587 Mode :character
## Mean :0.23664 Mean :0.38138
## 3rd Qu.:0.31684 3rd Qu.:0.48507
## Max. :0.67638 Max. :0.90264
##
## $Other
## xs_01 xs_02 xs_03 xs_04
## Min. :-0.08179 Min. :-0.58926 Min. :-0.05311 Min. :0.04405
## 1st Qu.: 0.14678 1st Qu.:-0.18671 1st Qu.: 0.26019 1st Qu.:0.24536
## Median : 0.20141 Median :-0.01860 Median : 0.41334 Median :0.28067
## Mean : 0.20129 Mean :-0.02167 Mean : 0.44045 Mean :0.28703
## 3rd Qu.: 0.24808 3rd Qu.: 0.14493 3rd Qu.: 0.61393 3rd Qu.:0.32161
## Max. : 0.67685 Max. : 0.67685 Max. : 1.44532 Max. :0.61849
## xs_05 xs_06 customer
## Min. :0.00000 Min. :0.04405 Length:245
## 1st Qu.:0.06906 1st Qu.:0.33333 Class :character
## Median :0.12681 Median :0.46359 Mode :character
## Mean :0.15362 Mean :0.47928
## 3rd Qu.:0.20775 3rd Qu.:0.60838
## Max. :0.56237 Max. :1.14684
##
## $Q
## xs_01 xs_02 xs_03 xs_04
## Min. :-0.1789 Min. :-0.37219 Min. :-0.1789 Min. :0.1014
## 1st Qu.: 0.1369 1st Qu.:-0.01576 1st Qu.: 0.2241 1st Qu.:0.2392
## Median : 0.2275 Median : 0.09962 Median : 0.3267 Median :0.2934
## Mean : 0.2243 Mean : 0.08974 Mean : 0.3623 Mean :0.3324
## 3rd Qu.: 0.3195 3rd Qu.: 0.19656 3rd Qu.: 0.5145 3rd Qu.:0.3504
## Max. : 0.5247 Max. : 0.52468 Max. : 1.0159 Max. :0.8988
## xs_05 xs_06 customer
## Min. :0.02799 Min. :0.1014 Length:36
## 1st Qu.:0.12687 1st Qu.:0.3115 Class :character
## Median :0.21997 Median :0.4006 Mode :character
## Mean :0.24190 Mean :0.4428
## 3rd Qu.:0.27744 3rd Qu.:0.5609
## Max. :0.89883 Max. :0.8988
Xn_outcome<-melt(data=df_all%>%select(starts_with("xn_"),"outcome"),id.vars="outcome",variable.name = "Xn_Variables",value.name = "Xn_Distribution")
Xn_outcome %>%
ggplot(mapping=aes(x=Xn_Variables,y=Xn_Distribution))+
geom_boxplot(aes(fill=outcome))
df_all %>%
select(n,outcome)%>%
split(.$outcome) %>% map(summary)
## Note: Using an external vector in selections is ambiguous.
## ℹ Use `all_of(n)` instead of `n` to silence this message.
## ℹ See <https://tidyselect.r-lib.org/reference/faq-external-vector.html>.
## This message is displayed once per session.
## $event
## xn_01 xn_02 xn_03 xn_04
## Min. :-4.00000 Min. :-4.000 Min. :-6.000 Min. :-4.00000
## 1st Qu.:-1.00000 1st Qu.:-0.500 1st Qu.:-3.000 1st Qu.:-0.35417
## Median : 0.33333 Median : 2.000 Median :-2.000 Median : 0.12500
## Mean : 0.01976 Mean : 2.071 Mean :-2.126 Mean :-0.01121
## 3rd Qu.: 1.10238 3rd Qu.: 4.000 3rd Qu.:-1.000 3rd Qu.: 0.44097
## Max. : 2.33333 Max. : 8.000 Max. : 1.000 Max. : 1.00000
## xn_05 xn_06 xn_07 xn_08
## Min. :-4.0000 Min. :-4.0000 Min. :-4.0000 Min. :-4.0000
## 1st Qu.:-1.0000 1st Qu.:-0.1000 1st Qu.: 0.8333 1st Qu.:-1.1944
## Median :-1.0000 Median : 1.0000 Median : 1.0000 Median :-0.8000
## Mean :-0.8656 Mean : 0.8053 Mean : 0.8639 Mean :-0.9634
## 3rd Qu.:-0.3667 3rd Qu.: 1.7083 3rd Qu.: 1.3333 3rd Qu.:-0.3632
## Max. : 0.5000 Max. : 4.0000 Max. : 2.5000 Max. : 0.5000
## outcome
## Length:127
## Class :character
## Mode :character
##
##
##
##
## $non_event
## xn_01 xn_02 xn_03 xn_04
## Min. :-3.500 Min. :-3.000 Min. :-7.000000 Min. :-1.0000
## 1st Qu.: 1.000 1st Qu.: 2.000 1st Qu.:-2.000000 1st Qu.: 0.4333
## Median : 1.857 Median : 4.000 Median : 0.000000 Median : 0.6752
## Mean : 1.913 Mean : 4.033 Mean :-0.003636 Mean : 0.7458
## 3rd Qu.: 2.650 3rd Qu.: 6.000 3rd Qu.: 2.000000 3rd Qu.: 1.0000
## Max. :10.000 Max. :13.000 Max. :10.000000 Max. : 5.0000
## xn_05 xn_06 xn_07 xn_08
## Min. :-3.000000 Min. :-1.000 Min. :-1.000 Min. :-3.0000
## 1st Qu.:-1.000000 1st Qu.: 1.000 1st Qu.: 1.000 1st Qu.:-0.6667
## Median : 0.000000 Median : 1.367 Median : 1.500 Median :-0.1854
## Mean : 0.004892 Mean : 1.634 Mean : 1.532 Mean :-0.1063
## 3rd Qu.: 0.750000 3rd Qu.: 2.000 3rd Qu.: 2.000 3rd Qu.: 0.3333
## Max. : 5.000000 Max. : 7.000 Max. : 5.000 Max. : 5.0000
## outcome
## Length:550
## Class :character
## Mode :character
##
##
##
Xn_region<-melt(data=df_all%>%select(starts_with("xn_"),region),id.vars="region",variable.name = "Xn_Variables",value.name = "Xn_Distribution")
Xn_region %>%
ggplot(mapping=aes(x=Xn_Variables,y=Xn_Distribution))+
geom_boxplot(aes(fill=region))
df_all %>%
select(n,region)%>%
split(.$region) %>% map(summary)
## $XX
## xn_01 xn_02 xn_03 xn_04
## Min. :-2.500 Min. :-2.000 Min. :-6.000 Min. :-1.0000
## 1st Qu.: 1.000 1st Qu.: 2.000 1st Qu.:-3.000 1st Qu.: 0.3312
## Median : 1.667 Median : 4.000 Median :-1.000 Median : 0.5655
## Mean : 1.588 Mean : 4.621 Mean :-1.087 Mean : 0.5772
## 3rd Qu.: 2.286 3rd Qu.: 6.000 3rd Qu.: 0.000 3rd Qu.: 0.8600
## Max. :10.000 Max. :12.000 Max. :10.000 Max. : 3.0000
## xn_05 xn_06 xn_07 xn_08
## Min. :-3.0000 Min. :-1.000 Min. :-1.000 Min. :-3.0000
## 1st Qu.:-1.0000 1st Qu.: 1.000 1st Qu.: 1.067 1st Qu.:-0.7500
## Median :-0.5000 Median : 1.667 Median : 1.400 Median :-0.3750
## Mean :-0.5039 Mean : 1.736 Mean : 1.440 Mean :-0.3311
## 3rd Qu.: 0.0000 3rd Qu.: 2.500 3rd Qu.: 1.765 3rd Qu.: 0.0000
## Max. : 3.0000 Max. : 6.000 Max. : 4.000 Max. : 3.0000
## region
## Length:161
## Class :character
## Mode :character
##
##
##
##
## $YY
## xn_01 xn_02 xn_03 xn_04
## Min. :-3.500 Min. :-3.000 Min. :-7.000 Min. :-2.0000
## 1st Qu.: 1.000 1st Qu.: 3.000 1st Qu.:-3.000 1st Qu.: 0.3864
## Median : 1.667 Median : 5.000 Median :-2.000 Median : 0.6122
## Mean : 1.605 Mean : 4.662 Mean :-1.324 Mean : 0.6022
## 3rd Qu.: 2.231 3rd Qu.: 7.000 3rd Qu.: 1.000 3rd Qu.: 0.8504
## Max. : 6.250 Max. :13.000 Max. : 6.000 Max. : 3.0000
## xn_05 xn_06 xn_07 xn_08
## Min. :-3.0000 Min. :-2.000 Min. :-2.000 Min. :-3.0000
## 1st Qu.:-1.0000 1st Qu.: 1.000 1st Qu.: 1.118 1st Qu.:-0.6667
## Median :-0.6667 Median : 1.750 Median : 1.434 Median :-0.2967
## Mean :-0.4687 Mean : 1.958 Mean : 1.414 Mean :-0.2939
## 3rd Qu.: 0.2375 3rd Qu.: 3.000 3rd Qu.: 1.667 3rd Qu.: 0.0000
## Max. : 3.0000 Max. : 7.000 Max. : 3.250 Max. : 3.0000
## region
## Length:222
## Class :character
## Mode :character
##
##
##
##
## $ZZ
## xn_01 xn_02 xn_03 xn_04
## Min. :-4.000 Min. :-4.000 Min. :-5.0000 Min. :-4.00000
## 1st Qu.: 0.000 1st Qu.: 1.000 1st Qu.:-1.0000 1st Qu.: 0.02708
## Median : 1.333 Median : 2.000 Median : 1.0000 Median : 0.55556
## Mean : 1.506 Mean : 2.388 Mean : 0.6701 Mean : 0.61960
## 3rd Qu.: 2.788 3rd Qu.: 4.000 3rd Qu.: 2.0000 3rd Qu.: 1.00000
## Max. : 9.000 Max. : 9.000 Max. : 9.0000 Max. : 5.00000
## xn_05 xn_06 xn_07 xn_08
## Min. :-4.0000 Min. :-4.0000 Min. :-4.000 Min. :-4.0000
## 1st Qu.:-0.3333 1st Qu.: 0.4464 1st Qu.: 1.000 1st Qu.:-1.0000
## Median : 0.2000 Median : 1.0000 Median : 1.127 Median : 0.0000
## Mean : 0.2652 Mean : 0.9761 Mean : 1.382 Mean :-0.2118
## 3rd Qu.: 1.0000 3rd Qu.: 1.5000 3rd Qu.: 2.000 3rd Qu.: 0.7292
## Max. : 5.0000 Max. : 6.0000 Max. : 5.000 Max. : 5.0000
## region
## Length:294
## Class :character
## Mode :character
##
##
##
Xn_customer<-melt(data=df_all%>%select(starts_with("xn_"),customer),id.vars="customer",variable.name = "Xn_Variables",value.name = "Xn_Distribution")
Xn_customer %>%
ggplot(mapping=aes(x=Xn_Variables,y=Xn_Distribution))+
geom_boxplot(aes(fill=customer))
df_all %>%
select(n,customer)%>%
split(.$customer) %>% map(summary)
## $A
## xn_01 xn_02 xn_03 xn_04
## Min. :-4.000 Min. :-4.000 Min. :-4.0000 Min. :-1.0000
## 1st Qu.: 0.000 1st Qu.: 0.000 1st Qu.:-1.0000 1st Qu.: 0.0000
## Median : 1.000 Median : 2.000 Median : 0.0000 Median : 0.4375
## Mean : 1.389 Mean : 1.855 Mean : 0.9091 Mean : 0.4984
## 3rd Qu.: 3.000 3rd Qu.: 3.500 3rd Qu.: 3.0000 3rd Qu.: 1.0000
## Max. : 8.000 Max. : 8.000 Max. : 8.0000 Max. : 4.0000
## xn_05 xn_06 xn_07 xn_08
## Min. :-1.5000 Min. :-1.0000 Min. :-1.000 Min. :-3.000
## 1st Qu.:-0.3333 1st Qu.: 0.0000 1st Qu.: 1.000 1st Qu.:-1.000
## Median : 0.0000 Median : 0.8333 Median : 1.000 Median :-0.500
## Mean : 0.2986 Mean : 0.6865 Mean : 1.408 Mean :-0.403
## 3rd Qu.: 1.0000 3rd Qu.: 1.0000 3rd Qu.: 2.000 3rd Qu.: 0.000
## Max. : 4.0000 Max. : 4.0000 Max. : 5.000 Max. : 4.000
## customer
## Length:55
## Class :character
## Mode :character
##
##
##
##
## $B
## xn_01 xn_02 xn_03 xn_04
## Min. :-2.000 Min. :-2.000 Min. :-6.000 Min. :-0.3333
## 1st Qu.: 1.394 1st Qu.: 4.000 1st Qu.:-4.250 1st Qu.: 0.5056
## Median : 1.743 Median : 6.000 Median :-3.000 Median : 0.6084
## Mean : 1.687 Mean : 6.058 Mean :-2.346 Mean : 0.6357
## 3rd Qu.: 2.000 3rd Qu.: 8.000 3rd Qu.:-0.750 3rd Qu.: 0.7351
## Max. : 3.000 Max. :12.000 Max. : 3.000 Max. : 2.0000
## xn_05 xn_06 xn_07 xn_08
## Min. :-3.0000 Min. :-0.3333 Min. :1.000 Min. :-1.4000
## 1st Qu.:-1.5000 1st Qu.: 1.6250 1st Qu.:1.331 1st Qu.:-0.5000
## Median :-1.0000 Median : 2.2500 Median :1.438 Median :-0.3342
## Mean :-0.9087 Mean : 2.5353 Mean :1.525 Mean :-0.3376
## 3rd Qu.:-0.2500 3rd Qu.: 3.0000 3rd Qu.:1.645 3rd Qu.:-0.2167
## Max. : 2.0000 Max. : 6.0000 Max. :3.000 Max. : 2.0000
## customer
## Length:52
## Class :character
## Mode :character
##
##
##
##
## $D
## xn_01 xn_02 xn_03 xn_04
## Min. :-2.000 Min. :-2.000 Min. :-4.0000 Min. :-1.0000
## 1st Qu.: 1.750 1st Qu.: 3.750 1st Qu.:-3.0000 1st Qu.: 0.6842
## Median : 2.435 Median : 6.000 Median :-2.0000 Median : 0.8678
## Mean : 2.367 Mean : 5.906 Mean :-0.8125 Mean : 0.7852
## 3rd Qu.: 2.958 3rd Qu.: 8.250 3rd Qu.: 1.0000 3rd Qu.: 0.9983
## Max. :10.000 Max. :11.000 Max. :10.0000 Max. : 2.0000
## xn_05 xn_06 xn_07 xn_08
## Min. :-3.0000 Min. :-1.000 Min. :0.000 Min. :-2.00000
## 1st Qu.:-1.0000 1st Qu.: 1.458 1st Qu.:1.408 1st Qu.:-0.50000
## Median :-0.8333 Median : 2.000 Median :1.793 Median :-0.14086
## Mean :-0.4292 Mean : 2.248 Mean :1.677 Mean :-0.11340
## 3rd Qu.: 0.5000 3rd Qu.: 3.000 3rd Qu.:1.954 3rd Qu.: 0.08659
## Max. : 2.0000 Max. : 6.000 Max. :3.000 Max. : 2.00000
## customer
## Length:32
## Class :character
## Mode :character
##
##
##
##
## $E
## xn_01 xn_02 xn_03 xn_04
## Min. :-0.2222 Min. : 1.000 Min. :-5.000 Min. :-0.1278
## 1st Qu.: 1.7857 1st Qu.: 5.000 1st Qu.:-3.000 1st Qu.: 0.7114
## Median : 2.1250 Median : 7.000 Median :-2.000 Median : 0.8005
## Mean : 2.0871 Mean : 6.629 Mean :-1.657 Mean : 0.7949
## 3rd Qu.: 2.4435 3rd Qu.: 8.500 3rd Qu.: 0.000 3rd Qu.: 0.9271
## Max. : 3.7143 Max. :10.000 Max. : 2.000 Max. : 1.5000
## xn_05 xn_06 xn_07 xn_08
## Min. :-3.000 Min. :0.600 Min. :0.8889 Min. :-1.33333
## 1st Qu.:-1.417 1st Qu.:2.000 1st Qu.:1.4276 1st Qu.:-0.23611
## Median :-1.000 Median :2.500 Median :1.6863 Median :-0.12500
## Mean :-0.761 Mean :2.717 Mean :1.6102 Mean :-0.08433
## 3rd Qu.: 0.000 3rd Qu.:3.000 3rd Qu.:1.8036 3rd Qu.: 0.03258
## Max. : 1.000 Max. :6.000 Max. :2.2222 Max. : 1.00000
## customer
## Length:35
## Class :character
## Mode :character
##
##
##
##
## $G
## xn_01 xn_02 xn_03 xn_04
## Min. :-4.000 Min. :-4.000 Min. :-5.0000 Min. :-2.0000
## 1st Qu.: 0.600 1st Qu.: 1.000 1st Qu.:-1.0000 1st Qu.: 0.2000
## Median : 1.167 Median : 2.000 Median : 0.0000 Median : 0.5000
## Mean : 1.571 Mean : 2.743 Mean : 0.5398 Mean : 0.6193
## 3rd Qu.: 2.500 3rd Qu.: 4.000 3rd Qu.: 2.0000 3rd Qu.: 1.0000
## Max. : 9.000 Max. : 9.000 Max. : 9.0000 Max. : 5.0000
## xn_05 xn_06 xn_07 xn_08
## Min. :-3.000 Min. :-2.00 Min. :-2.000 Min. :-4.0000
## 1st Qu.:-0.500 1st Qu.: 0.50 1st Qu.: 1.000 1st Qu.:-1.0000
## Median : 0.000 Median : 1.00 Median : 1.200 Median :-0.3333
## Mean : 0.182 Mean : 1.08 Mean : 1.387 Mean :-0.2241
## 3rd Qu.: 1.000 3rd Qu.: 1.50 3rd Qu.: 2.000 3rd Qu.: 0.5000
## Max. : 5.000 Max. : 5.00 Max. : 5.000 Max. : 5.0000
## customer
## Length:113
## Class :character
## Mode :character
##
##
##
##
## $K
## xn_01 xn_02 xn_03 xn_04
## Min. :-2.000 Min. :-2.000 Min. :-3 Min. :-1.0000
## 1st Qu.: 0.000 1st Qu.: 0.250 1st Qu.:-1 1st Qu.: 0.0000
## Median : 1.000 Median : 2.000 Median : 0 Median : 0.3964
## Mean : 1.042 Mean : 2.132 Mean : 0 Mean : 0.4873
## 3rd Qu.: 2.000 3rd Qu.: 4.000 3rd Qu.: 1 3rd Qu.: 0.8125
## Max. : 4.000 Max. : 7.000 Max. : 4 Max. : 4.0000
## xn_05 xn_06 xn_07 xn_08
## Min. :-1.0000 Min. :-1.0000 Min. :-1.000 Min. :-3.0000
## 1st Qu.:-0.6250 1st Qu.: 0.0625 1st Qu.: 1.000 1st Qu.:-1.0000
## Median : 0.0000 Median : 1.0000 Median : 1.000 Median :-0.2679
## Mean : 0.1189 Mean : 0.8268 Mean : 1.108 Mean :-0.2270
## 3rd Qu.: 0.5000 3rd Qu.: 1.0000 3rd Qu.: 1.438 3rd Qu.: 0.0000
## Max. : 4.0000 Max. : 4.0000 Max. : 4.000 Max. : 4.0000
## customer
## Length:38
## Class :character
## Mode :character
##
##
##
##
## $M
## xn_01 xn_02 xn_03 xn_04
## Min. :-4.000 Min. :-4.000 Min. :-5.0000 Min. :-4.0000
## 1st Qu.: 1.000 1st Qu.: 1.000 1st Qu.:-1.0000 1st Qu.: 0.3187
## Median : 2.000 Median : 3.000 Median : 1.0000 Median : 0.7500
## Mean : 1.745 Mean : 2.634 Mean : 0.7887 Mean : 0.6278
## 3rd Qu.: 3.000 3rd Qu.: 4.500 3rd Qu.: 2.5000 3rd Qu.: 1.0417
## Max. : 7.000 Max. : 7.000 Max. : 7.0000 Max. : 2.8333
## xn_05 xn_06 xn_07 xn_08
## Min. :-4.0000 Min. :-4.0000 Min. :-4.00 Min. :-4.0000
## 1st Qu.:-0.3333 1st Qu.: 0.5000 1st Qu.: 1.00 1st Qu.:-1.0000
## Median : 0.3333 Median : 1.0000 Median : 1.50 Median :-0.3333
## Mean : 0.2207 Mean : 0.9871 Mean : 1.52 Mean :-0.3305
## 3rd Qu.: 1.0000 3rd Qu.: 1.5000 3rd Qu.: 2.00 3rd Qu.: 0.5833
## Max. : 2.5000 Max. : 6.0000 Max. : 4.00 Max. : 2.0000
## customer
## Length:71
## Class :character
## Mode :character
##
##
##
##
## $Other
## xn_01 xn_02 xn_03 xn_04
## Min. :-3.5000 Min. :-2.00 Min. :-7.0000 Min. :-2.0000
## 1st Qu.: 0.8571 1st Qu.: 2.00 1st Qu.:-2.0000 1st Qu.: 0.3215
## Median : 1.5000 Median : 4.00 Median :-1.0000 Median : 0.5383
## Mean : 1.4631 Mean : 3.98 Mean :-0.9265 Mean : 0.5846
## 3rd Qu.: 2.0000 3rd Qu.: 6.00 3rd Qu.: 1.0000 3rd Qu.: 0.8526
## Max. : 8.0000 Max. :13.00 Max. : 8.0000 Max. : 3.0000
## xn_05 xn_06 xn_07 xn_08
## Min. :-3.0000 Min. :-2.00 Min. :-2.000 Min. :-3.0000
## 1st Qu.:-1.0000 1st Qu.: 1.00 1st Qu.: 1.000 1st Qu.:-0.6875
## Median :-0.5000 Median : 1.50 Median : 1.347 Median :-0.3627
## Mean :-0.3158 Mean : 1.65 Mean : 1.356 Mean :-0.2706
## 3rd Qu.: 0.3333 3rd Qu.: 2.00 3rd Qu.: 1.667 3rd Qu.: 0.0000
## Max. : 3.0000 Max. : 7.00 Max. : 3.000 Max. : 3.0000
## customer
## Length:245
## Class :character
## Mode :character
##
##
##
##
## $Q
## xn_01 xn_02 xn_03 xn_04
## Min. :-3.000 Min. :-3.00 Min. :-4.0000 Min. :-1.0000
## 1st Qu.: 0.000 1st Qu.: 0.75 1st Qu.:-2.0000 1st Qu.: 0.0050
## Median : 1.167 Median : 3.00 Median :-0.5000 Median : 0.4792
## Mean : 1.177 Mean : 2.50 Mean :-0.1667 Mean : 0.5292
## 3rd Qu.: 2.000 3rd Qu.: 4.00 3rd Qu.: 1.0000 3rd Qu.: 1.0266
## Max. : 6.250 Max. :10.00 Max. : 4.0000 Max. : 3.0000
## xn_05 xn_06 xn_07 xn_08
## Min. :-1.00000 Min. :-1.000 Min. :0.000 Min. :-3.0000
## 1st Qu.:-0.66667 1st Qu.: 0.250 1st Qu.:1.000 1st Qu.:-1.0000
## Median :-0.16667 Median : 1.000 Median :1.200 Median : 0.0000
## Mean : 0.01594 Mean : 1.017 Mean :1.284 Mean :-0.3009
## 3rd Qu.: 0.57500 3rd Qu.: 1.625 3rd Qu.:1.667 3rd Qu.: 0.3750
## Max. : 3.00000 Max. : 3.000 Max. :3.250 Max. : 3.0000
## customer
## Length:36
## Class :character
## Mode :character
##
##
##
Xa_outcome<-melt(data=df_all%>%select(starts_with("xa_"),"outcome"),id.vars="outcome",variable.name = "Xa_Variables",value.name = "Xa_Distribution")
Xa_outcome %>%
ggplot(mapping=aes(x=Xa_Variables,y=Xa_Distribution))+
geom_boxplot(aes(fill=outcome))
df_all %>%
select(a,outcome)%>%
split(.$outcome) %>% map(summary)
## Note: Using an external vector in selections is ambiguous.
## ℹ Use `all_of(a)` instead of `a` to silence this message.
## ℹ See <https://tidyselect.r-lib.org/reference/faq-external-vector.html>.
## This message is displayed once per session.
## $event
## xa_01 xa_02 xa_03 xa_04
## Min. :-3.000 Min. :-3.00 Min. :-7.000 Min. :-2.000
## 1st Qu.: 4.817 1st Qu.: 7.00 1st Qu.:-1.000 1st Qu.: 1.736
## Median : 6.400 Median :11.00 Median : 2.000 Median : 2.581
## Mean : 6.161 Mean :11.17 Mean : 1.953 Mean : 2.375
## 3rd Qu.: 8.000 3rd Qu.:15.00 3rd Qu.: 4.000 3rd Qu.: 3.099
## Max. :16.000 Max. :26.00 Max. :16.000 Max. : 6.750
## xa_05 xa_06 xa_07 xa_08
## Min. :-6.0000 Min. :-2.000 Min. :-2.000 Min. :-5.0000
## 1st Qu.:-0.2917 1st Qu.: 2.875 1st Qu.: 3.500 1st Qu.:-0.5000
## Median : 0.6667 Median : 4.000 Median : 4.088 Median : 0.7500
## Mean : 0.7251 Mean : 4.464 Mean : 4.215 Mean : 0.5234
## 3rd Qu.: 1.8286 3rd Qu.: 6.000 3rd Qu.: 5.000 3rd Qu.: 1.6753
## Max. : 6.0000 Max. :15.000 Max. :11.000 Max. : 6.0000
## outcome
## Length:127
## Class :character
## Mode :character
##
##
##
##
## $non_event
## xa_01 xa_02 xa_03 xa_04
## Min. :-2.000 Min. :-2.00 Min. :-9.000 Min. :-2.000
## 1st Qu.: 6.500 1st Qu.: 8.25 1st Qu.: 0.000 1st Qu.: 2.388
## Median : 8.156 Median :13.50 Median : 4.000 Median : 3.000
## Mean : 8.515 Mean :13.72 Mean : 4.271 Mean : 3.076
## 3rd Qu.:10.036 3rd Qu.:18.00 3rd Qu.: 7.000 3rd Qu.: 3.577
## Max. :35.000 Max. :38.00 Max. :35.000 Max. :12.000
## xa_05 xa_06 xa_07 xa_08
## Min. :-8.00000 Min. :-2.000 Min. :-2.000 Min. :-5.0000
## 1st Qu.: 0.04167 1st Qu.: 3.000 1st Qu.: 4.000 1st Qu.: 0.6254
## Median : 1.50000 Median : 4.500 Median : 4.718 Median : 1.2386
## Mean : 1.53068 Mean : 5.308 Mean : 4.810 Mean : 1.3823
## 3rd Qu.: 3.00000 3rd Qu.: 7.000 3rd Qu.: 5.500 3rd Qu.: 2.0000
## Max. :12.00000 Max. :23.000 Max. :13.000 Max. :12.0000
## outcome
## Length:550
## Class :character
## Mode :character
##
##
##
Xa_region<-melt(data=df_all%>%select(starts_with("xa_"),region),id.vars="region",variable.name = "Xa_Variables",value.name = "Xa_Distribution")
Xa_region %>%
ggplot(mapping=aes(x=Xa_Variables,y=Xa_Distribution))+
geom_boxplot(aes(fill=region))
df_all %>%
select(a,region)%>%
split(.$region) %>% map(summary)
## $XX
## xa_01 xa_02 xa_03 xa_04
## Min. :-2.000 Min. :-2.00 Min. :-9.000 Min. :-2.000
## 1st Qu.: 6.677 1st Qu.:11.00 1st Qu.:-1.000 1st Qu.: 2.448
## Median : 8.000 Median :15.00 Median : 2.000 Median : 2.866
## Mean : 8.093 Mean :15.26 Mean : 2.199 Mean : 2.958
## 3rd Qu.: 9.667 3rd Qu.:21.00 3rd Qu.: 4.000 3rd Qu.: 3.302
## Max. :23.000 Max. :32.00 Max. :23.000 Max. :10.000
## xa_05 xa_06 xa_07 xa_08
## Min. :-8.0000 Min. :-2.000 Min. :-2.000 Min. :-3.0000
## 1st Qu.:-0.6667 1st Qu.: 3.500 1st Qu.: 4.000 1st Qu.: 0.6429
## Median : 0.6667 Median : 5.500 Median : 4.590 Median : 1.0667
## Mean : 0.7295 Mean : 5.934 Mean : 4.666 Mean : 1.2323
## 3rd Qu.: 2.0000 3rd Qu.: 8.000 3rd Qu.: 5.167 3rd Qu.: 1.7857
## Max. :10.0000 Max. :21.000 Max. :12.000 Max. :10.0000
## region
## Length:161
## Class :character
## Mode :character
##
##
##
##
## $YY
## xa_01 xa_02 xa_03 xa_04
## Min. :-2.000 Min. :-2.00 Min. :-9.000 Min. :-2.000
## 1st Qu.: 6.600 1st Qu.:10.00 1st Qu.:-1.000 1st Qu.: 2.337
## Median : 8.000 Median :16.00 Median : 1.000 Median : 2.845
## Mean : 7.813 Mean :15.51 Mean : 1.802 Mean : 2.740
## 3rd Qu.: 9.200 3rd Qu.:21.00 3rd Qu.: 5.000 3rd Qu.: 3.193
## Max. :17.000 Max. :38.00 Max. :17.000 Max. : 7.000
## xa_05 xa_06 xa_07 xa_08
## Min. :-8.0000 Min. :-2.000 Min. :-2.000 Min. :-4.0000
## 1st Qu.:-0.5000 1st Qu.: 3.375 1st Qu.: 4.000 1st Qu.: 0.3438
## Median : 0.5000 Median : 5.292 Median : 4.648 Median : 1.0000
## Mean : 0.5413 Mean : 6.210 Mean : 4.571 Mean : 0.9398
## 3rd Qu.: 2.0000 3rd Qu.: 8.000 3rd Qu.: 5.162 3rd Qu.: 1.7362
## Max. : 7.0000 Max. :23.000 Max. :11.000 Max. : 7.0000
## region
## Length:222
## Class :character
## Mode :character
##
##
##
##
## $ZZ
## xa_01 xa_02 xa_03 xa_04
## Min. :-3.000 Min. :-3.00 Min. :-6.000 Min. :-1.500
## 1st Qu.: 5.083 1st Qu.: 6.00 1st Qu.: 3.000 1st Qu.: 2.000
## Median : 7.583 Median :10.00 Median : 5.000 Median : 3.000
## Mean : 8.258 Mean :10.43 Mean : 6.269 Mean : 3.092
## 3rd Qu.:11.000 3rd Qu.:14.00 3rd Qu.: 9.000 3rd Qu.: 3.759
## Max. :35.000 Max. :35.00 Max. :35.000 Max. :12.000
## xa_05 xa_06 xa_07 xa_08
## Min. :-3.000 Min. :-1.500 Min. :-1.000 Min. :-5.000
## 1st Qu.: 1.175 1st Qu.: 2.500 1st Qu.: 3.375 1st Qu.: 0.000
## Median : 2.250 Median : 3.667 Median : 4.536 Median : 1.667
## Mean : 2.369 Mean : 3.919 Mean : 4.814 Mean : 1.428
## 3rd Qu.: 3.237 3rd Qu.: 5.000 3rd Qu.: 6.000 3rd Qu.: 2.500
## Max. :12.000 Max. :12.000 Max. :13.000 Max. :12.000
## region
## Length:294
## Class :character
## Mode :character
##
##
##
Xa_customer<-melt(data=df_all%>%select(starts_with("xa_"),customer),id.vars="customer",variable.name = "Xa_Variables",value.name = "Xa_Distribution")
Xa_customer %>%
ggplot(mapping=aes(x=Xa_Variables,y=Xa_Distribution))+
geom_boxplot(aes(fill=customer))
df_all %>%
select(a,customer)%>%
split(.$customer) %>% map(summary)
## $A
## xa_01 xa_02 xa_03 xa_04
## Min. :-3.000 Min. :-3.000 Min. :-6.000 Min. :-1.500
## 1st Qu.: 5.167 1st Qu.: 6.500 1st Qu.: 3.000 1st Qu.: 2.000
## Median : 7.000 Median :11.000 Median : 7.000 Median : 3.000
## Mean : 8.677 Mean : 9.636 Mean : 7.618 Mean : 3.077
## 3rd Qu.:12.833 3rd Qu.:14.000 3rd Qu.:12.500 3rd Qu.: 3.875
## Max. :20.000 Max. :20.000 Max. :20.000 Max. : 7.000
## xa_05 xa_06 xa_07 xa_08
## Min. :-2.000 Min. :-1.500 Min. :-1.000 Min. :-5.000
## 1st Qu.: 1.500 1st Qu.: 2.333 1st Qu.: 3.250 1st Qu.: 0.000
## Median : 3.000 Median : 3.667 Median : 4.500 Median : 2.000
## Mean : 2.708 Mean : 3.415 Mean : 4.944 Mean : 1.286
## 3rd Qu.: 3.750 3rd Qu.: 4.000 3rd Qu.: 6.000 3rd Qu.: 2.750
## Max. : 7.000 Max. : 9.000 Max. :10.000 Max. : 7.000
## customer
## Length:55
## Class :character
## Mode :character
##
##
##
##
## $B
## xa_01 xa_02 xa_03 xa_04
## Min. : 2.000 Min. : 2.00 Min. :-9.0000 Min. :0.400
## 1st Qu.: 6.780 1st Qu.:13.75 1st Qu.:-4.0000 1st Qu.:2.525
## Median : 7.705 Median :17.50 Median :-0.5000 Median :2.754
## Mean : 7.510 Mean :17.21 Mean :-0.3462 Mean :2.866
## 3rd Qu.: 8.213 3rd Qu.:22.00 3rd Qu.: 2.2500 3rd Qu.:3.006
## Max. :12.000 Max. :32.00 Max. :12.0000 Max. :7.000
## xa_05 xa_06 xa_07 xa_08
## Min. :-8.0000 Min. : 0.400 Min. :2.000 Min. :-3.0000
## 1st Qu.:-1.7500 1st Qu.: 5.375 1st Qu.:4.220 1st Qu.: 0.7317
## Median :-0.1250 Median : 7.250 Median :4.511 Median : 0.9789
## Mean :-0.3446 Mean : 8.089 Mean :4.580 Mean : 1.0846
## 3rd Qu.: 1.0000 3rd Qu.:10.167 3rd Qu.:4.743 3rd Qu.: 1.2065
## Max. : 7.0000 Max. :23.000 Max. :9.000 Max. : 7.0000
## customer
## Length:52
## Class :character
## Mode :character
##
##
##
##
## $D
## xa_01 xa_02 xa_03 xa_04
## Min. : 4.500 Min. : 6.00 Min. :-5.000 Min. :1.602
## 1st Qu.: 8.377 1st Qu.:12.00 1st Qu.:-2.000 1st Qu.:3.010
## Median : 9.178 Median :18.00 Median : 2.500 Median :3.141
## Mean : 9.508 Mean :18.56 Mean : 2.625 Mean :3.205
## 3rd Qu.:10.121 3rd Qu.:23.00 3rd Qu.: 4.000 3rd Qu.:3.290
## Max. :23.000 Max. :32.00 Max. :23.000 Max. :7.000
## xa_05 xa_06 xa_07 xa_08
## Min. :-3.0000 Min. : 2.000 Min. :3.167 Min. :-3.000
## 1st Qu.:-1.1250 1st Qu.: 3.875 1st Qu.:4.925 1st Qu.: 1.011
## Median : 1.0000 Median : 6.600 Median :5.020 Median : 1.371
## Mean : 0.6534 Mean : 6.575 Mean :4.971 Mean : 1.481
## 3rd Qu.: 2.0500 3rd Qu.: 8.000 3rd Qu.:5.290 3rd Qu.: 2.000
## Max. : 7.0000 Max. :16.000 Max. :7.000 Max. : 7.000
## customer
## Length:32
## Class :character
## Mode :character
##
##
##
##
## $E
## xa_01 xa_02 xa_03 xa_04
## Min. : 3.500 Min. : 7.00 Min. :-9.0000 Min. :1.083
## 1st Qu.: 7.404 1st Qu.:16.50 1st Qu.:-3.0000 1st Qu.:2.869
## Median : 8.670 Median :21.00 Median :-1.0000 Median :3.055
## Mean : 8.437 Mean :19.89 Mean :-0.4286 Mean :3.026
## 3rd Qu.: 9.367 3rd Qu.:25.00 3rd Qu.: 1.5000 3rd Qu.:3.271
## Max. :13.333 Max. :32.00 Max. : 8.0000 Max. :5.333
## xa_05 xa_06 xa_07 xa_08
## Min. :-3.000 Min. : 2.667 Min. :2.667 Min. :-1.500
## 1st Qu.:-1.833 1st Qu.: 5.333 1st Qu.:4.373 1st Qu.: 1.016
## Median :-0.500 Median : 7.000 Median :4.810 Median : 1.343
## Mean :-0.290 Mean : 7.645 Mean :4.764 Mean : 1.305
## 3rd Qu.: 1.000 3rd Qu.: 9.250 3rd Qu.:5.022 3rd Qu.: 1.495
## Max. : 4.000 Max. :17.000 Max. :6.667 Max. : 4.500
## customer
## Length:35
## Class :character
## Mode :character
##
##
##
##
## $G
## xa_01 xa_02 xa_03 xa_04
## Min. :-3.000 Min. :-3.00 Min. :-6.000 Min. :-2.000
## 1st Qu.: 5.500 1st Qu.: 7.00 1st Qu.: 2.000 1st Qu.: 2.193
## Median : 7.750 Median :10.00 Median : 5.000 Median : 3.000
## Mean : 8.299 Mean :10.75 Mean : 5.929 Mean : 2.997
## 3rd Qu.:10.500 3rd Qu.:14.00 3rd Qu.: 9.000 3rd Qu.: 3.750
## Max. :35.000 Max. :35.00 Max. :35.000 Max. :10.000
## xa_05 xa_06 xa_07 xa_08
## Min. :-3.000 Min. :-2.000 Min. :-2.000 Min. :-5.000
## 1st Qu.: 0.800 1st Qu.: 2.667 1st Qu.: 3.500 1st Qu.: 0.000
## Median : 2.000 Median : 4.000 Median : 4.500 Median : 1.333
## Mean : 2.117 Mean : 4.062 Mean : 4.771 Mean : 1.236
## 3rd Qu.: 3.000 3rd Qu.: 5.000 3rd Qu.: 5.800 3rd Qu.: 2.333
## Max. :10.000 Max. :12.000 Max. :13.000 Max. :10.000
## customer
## Length:113
## Class :character
## Mode :character
##
##
##
##
## $K
## xa_01 xa_02 xa_03 xa_04
## Min. : 1.500 Min. : 2.00 Min. :-2.000 Min. :0.5833
## 1st Qu.: 4.250 1st Qu.: 4.25 1st Qu.: 2.000 1st Qu.:1.9375
## Median : 7.000 Median : 8.50 Median : 4.000 Median :3.0000
## Mean : 6.825 Mean :10.16 Mean : 4.263 Mean :2.7753
## 3rd Qu.: 8.438 3rd Qu.:14.00 3rd Qu.: 5.000 3rd Qu.:3.3229
## Max. :15.000 Max. :27.00 Max. :15.000 Max. :7.0000
## xa_05 xa_06 xa_07 xa_08
## Min. :-0.400 Min. :0.6667 Min. :2.000 Min. :-5.0000
## 1st Qu.: 1.000 1st Qu.:2.1667 1st Qu.:3.036 1st Qu.: 0.6562
## Median : 1.583 Median :3.8333 Median :4.100 Median : 1.1250
## Mean : 2.007 Mean :3.6386 Mean :4.474 Mean : 1.2862
## 3rd Qu.: 3.000 3rd Qu.:4.6250 3rd Qu.:5.500 3rd Qu.: 2.0000
## Max. : 7.000 Max. :9.0000 Max. :9.000 Max. : 7.0000
## customer
## Length:38
## Class :character
## Mode :character
##
##
##
##
## $M
## xa_01 xa_02 xa_03 xa_04
## Min. : 1.000 Min. : 1.00 Min. :-5.000 Min. : 0.500
## 1st Qu.: 5.762 1st Qu.: 7.00 1st Qu.: 3.000 1st Qu.: 2.000
## Median : 8.000 Median :10.00 Median : 6.000 Median : 3.000
## Mean : 8.832 Mean :11.37 Mean : 6.437 Mean : 3.091
## 3rd Qu.:12.000 3rd Qu.:15.00 3rd Qu.: 9.000 3rd Qu.: 3.782
## Max. :22.000 Max. :26.00 Max. :22.000 Max. :12.000
## xa_05 xa_06 xa_07 xa_08
## Min. :-2.000 Min. : 0.500 Min. : 2.000 Min. :-3.000
## 1st Qu.: 1.250 1st Qu.: 2.667 1st Qu.: 3.000 1st Qu.: 0.000
## Median : 2.143 Median : 3.500 Median : 4.667 Median : 1.636
## Mean : 2.218 Mean : 4.099 Mean : 4.833 Mean : 1.432
## 3rd Qu.: 3.000 3rd Qu.: 5.100 3rd Qu.: 6.000 3rd Qu.: 2.417
## Max. :12.000 Max. :12.000 Max. :12.000 Max. :12.000
## customer
## Length:71
## Class :character
## Mode :character
##
##
##
##
## $Other
## xa_01 xa_02 xa_03 xa_04
## Min. :-2.000 Min. :-2.00 Min. :-8.0 Min. :-2.000
## 1st Qu.: 6.250 1st Qu.: 9.00 1st Qu.: 0.0 1st Qu.: 2.252
## Median : 7.962 Median :14.00 Median : 2.0 Median : 2.778
## Mean : 7.748 Mean :14.01 Mean : 2.8 Mean : 2.868
## 3rd Qu.: 9.000 3rd Qu.:19.00 3rd Qu.: 5.0 3rd Qu.: 3.323
## Max. :21.000 Max. :38.00 Max. :21.0 Max. :10.000
## xa_05 xa_06 xa_07 xa_08
## Min. :-4.000 Min. :-2.000 Min. :-2.000 Min. :-4.0000
## 1st Qu.: 0.000 1st Qu.: 3.000 1st Qu.: 3.786 1st Qu.: 0.3333
## Median : 1.000 Median : 5.000 Median : 4.500 Median : 1.0238
## Mean : 1.062 Mean : 5.545 Mean : 4.540 Mean : 1.1551
## 3rd Qu.: 2.000 3rd Qu.: 7.000 3rd Qu.: 5.208 3rd Qu.: 1.9868
## Max. :10.000 Max. :21.000 Max. :12.000 Max. :10.0000
## customer
## Length:245
## Class :character
## Mode :character
##
##
##
##
## $Q
## xa_01 xa_02 xa_03 xa_04
## Min. :-2.000 Min. :-2.00 Min. :-2.000 Min. :-0.6667
## 1st Qu.: 4.812 1st Qu.: 7.00 1st Qu.: 1.000 1st Qu.: 1.7500
## Median : 7.433 Median :12.00 Median : 4.000 Median : 3.0500
## Mean : 7.660 Mean :11.42 Mean : 4.222 Mean : 2.7903
## 3rd Qu.:11.125 3rd Qu.:16.00 3rd Qu.: 7.000 3rd Qu.: 3.6719
## Max. :14.000 Max. :28.00 Max. :14.000 Max. : 7.0000
## xa_05 xa_06 xa_07 xa_08
## Min. :-1.0000 Min. :-0.6667 Min. : 1.000 Min. :-4.0000
## 1st Qu.: 0.3333 1st Qu.: 3.0000 1st Qu.: 4.000 1st Qu.:-0.5000
## Median : 1.5000 Median : 4.0000 Median : 5.000 Median : 1.0000
## Mean : 1.6403 Mean : 4.2495 Mean : 5.018 Mean : 0.9276
## 3rd Qu.: 2.8000 3rd Qu.: 5.7500 3rd Qu.: 6.000 3rd Qu.: 1.8500
## Max. : 7.0000 Max. :14.0000 Max. :11.000 Max. : 7.0000
## customer
## Length:36
## Class :character
## Mode :character
##
##
##
Xw_outcome<-melt(data=df_all%>%select(starts_with("xw_"),"outcome"),id.vars="outcome",variable.name = "Xw_Variables",value.name = "Xw_Distribution")
Xw_outcome %>%
ggplot(mapping=aes(x=Xw_Variables,y=Xw_Distribution))+
geom_boxplot(aes(fill=outcome))
df_all %>%
select(w,outcome)%>%
split(.$outcome) %>% map(summary)
## Note: Using an external vector in selections is ambiguous.
## ℹ Use `all_of(w)` instead of `w` to silence this message.
## ℹ See <https://tidyselect.r-lib.org/reference/faq-external-vector.html>.
## This message is displayed once per session.
## $event
## xw_01 xw_02 xw_03 outcome
## Min. : 14.00 Min. : 0.00 Min. : 14.0 Length:127
## 1st Qu.: 43.85 1st Qu.: 10.50 1st Qu.: 65.5 Class :character
## Median : 56.85 Median : 25.00 Median : 94.0 Mode :character
## Mean : 57.59 Mean : 30.93 Mean : 82.1
## 3rd Qu.: 68.69 3rd Qu.: 45.00 3rd Qu.:101.0
## Max. :102.00 Max. :102.00 Max. :110.0
##
## $non_event
## xw_01 xw_02 xw_03 outcome
## Min. : 9.00 Min. : 0.00 Min. : 9.00 Length:550
## 1st Qu.: 44.50 1st Qu.: 9.00 1st Qu.: 57.00 Class :character
## Median : 57.79 Median : 24.00 Median : 91.50 Mode :character
## Mean : 56.88 Mean : 32.08 Mean : 78.37
## 3rd Qu.: 67.46 3rd Qu.: 51.00 3rd Qu.:101.00
## Max. :108.00 Max. :108.00 Max. :113.00
Xw_region<-melt(data=df_all%>%select(starts_with("xw_"),region),id.vars="region",variable.name = "Xw_Variables",value.name = "Xw_Distribution")
Xw_region %>%
ggplot(mapping=aes(x=Xw_Variables,y=Xw_Distribution))+
geom_boxplot(aes(fill=region))
df_all %>%
select(w,region)%>%
split(.$region) %>% map(summary)
## $XX
## xw_01 xw_02 xw_03 region
## Min. : 10.50 Min. : 0.00 Min. : 14.00 Length:161
## 1st Qu.: 50.37 1st Qu.: 0.00 1st Qu.: 82.00 Class :character
## Median : 58.55 Median : 16.00 Median : 98.00 Mode :character
## Mean : 58.31 Mean : 24.11 Mean : 87.91
## 3rd Qu.: 65.92 3rd Qu.: 31.00 3rd Qu.:103.00
## Max. :108.00 Max. :108.00 Max. :110.00
##
## $YY
## xw_01 xw_02 xw_03 region
## Min. : 11.00 Min. : 0.00 Min. : 11.00 Length:222
## 1st Qu.: 51.54 1st Qu.: 0.00 1st Qu.: 83.00 Class :character
## Median : 59.01 Median : 14.50 Median : 98.00 Mode :character
## Mean : 58.58 Mean : 23.04 Mean : 88.18
## 3rd Qu.: 66.76 3rd Qu.: 35.00 3rd Qu.:103.00
## Max. :103.00 Max. :103.00 Max. :113.00
##
## $ZZ
## xw_01 xw_02 xw_03 region
## Min. : 9.00 Min. : 0.00 Min. : 9.00 Length:294
## 1st Qu.: 37.50 1st Qu.: 20.00 1st Qu.: 41.25 Class :character
## Median : 53.22 Median : 38.50 Median : 69.00 Mode :character
## Mean : 55.13 Mean : 42.78 Mean : 67.35
## 3rd Qu.: 71.78 3rd Qu.: 62.00 3rd Qu.: 95.00
## Max. :104.00 Max. :104.00 Max. :110.00
Xw_customer<-melt(data=df_all%>%select(starts_with("xw_"),customer),id.vars="customer",variable.name = "Xw_Variables",value.name = "Xw_Distribution")
Xw_customer %>%
ggplot(mapping=aes(x=Xw_Variables,y=Xw_Distribution))+
geom_boxplot(aes(fill=customer))
df_all %>%
select(w,customer)%>%
split(.$customer) %>% map(summary)
## $A
## xw_01 xw_02 xw_03 customer
## Min. : 11.00 Min. : 7.00 Min. : 11.00 Length:55
## 1st Qu.: 38.00 1st Qu.: 34.50 1st Qu.: 38.50 Class :character
## Median : 58.50 Median : 51.00 Median : 66.00 Mode :character
## Mean : 58.14 Mean : 52.91 Mean : 63.76
## 3rd Qu.: 76.83 3rd Qu.: 75.00 3rd Qu.: 79.50
## Max. :104.00 Max. :104.00 Max. :106.00
##
## $B
## xw_01 xw_02 xw_03 customer
## Min. :11.00 Min. : 0.00 Min. : 11.00 Length:52
## 1st Qu.:59.45 1st Qu.: 0.00 1st Qu.: 96.00 Class :character
## Median :62.19 Median : 6.50 Median :103.00 Mode :character
## Mean :60.50 Mean :15.71 Mean : 93.42
## 3rd Qu.:66.31 3rd Qu.:23.25 3rd Qu.:106.00
## Max. :94.00 Max. :94.00 Max. :112.00
##
## $D
## xw_01 xw_02 xw_03 customer
## Min. : 23.00 Min. : 0.00 Min. : 23.00 Length:32
## 1st Qu.: 51.22 1st Qu.: 0.00 1st Qu.: 90.00 Class :character
## Median : 55.60 Median : 13.00 Median :103.00 Mode :character
## Mean : 57.89 Mean : 21.56 Mean : 91.47
## 3rd Qu.: 64.85 3rd Qu.: 26.50 3rd Qu.:106.00
## Max. :106.00 Max. :106.00 Max. :109.00
##
## $E
## xw_01 xw_02 xw_03 customer
## Min. :28.00 Min. : 0.000 Min. : 33.00 Length:35
## 1st Qu.:46.16 1st Qu.: 0.000 1st Qu.: 98.00 Class :character
## Median :53.15 Median : 0.000 Median :100.00 Mode :character
## Mean :51.07 Mean : 7.486 Mean : 95.06
## 3rd Qu.:55.93 3rd Qu.:12.000 3rd Qu.:103.50
## Max. :70.00 Max. :62.000 Max. :113.00
##
## $G
## xw_01 xw_02 xw_03 customer
## Min. : 14.00 Min. : 0.00 Min. : 14.00 Length:113
## 1st Qu.: 41.80 1st Qu.: 20.00 1st Qu.: 47.00 Class :character
## Median : 53.74 Median : 36.00 Median : 82.00 Mode :character
## Mean : 56.19 Mean : 40.88 Mean : 71.23
## 3rd Qu.: 67.50 3rd Qu.: 56.00 3rd Qu.: 97.00
## Max. :103.00 Max. :103.00 Max. :110.00
##
## $K
## xw_01 xw_02 xw_03 customer
## Min. :12.00 Min. : 0.00 Min. : 16.00 Length:38
## 1st Qu.:37.12 1st Qu.:15.25 1st Qu.: 49.25 Class :character
## Median :45.50 Median :25.00 Median : 65.00 Mode :character
## Mean :49.09 Mean :33.89 Mean : 66.66
## 3rd Qu.:56.75 3rd Qu.:54.75 3rd Qu.: 92.75
## Max. :93.00 Max. :93.00 Max. :104.00
##
## $M
## xw_01 xw_02 xw_03 customer
## Min. : 11.75 Min. : 0.00 Min. : 13.00 Length:71
## 1st Qu.: 39.00 1st Qu.: 20.50 1st Qu.: 43.00 Class :character
## Median : 64.00 Median : 40.00 Median : 84.00 Mode :character
## Mean : 60.04 Mean : 45.99 Mean : 71.82
## 3rd Qu.: 82.17 3rd Qu.: 68.00 3rd Qu.: 97.00
## Max. :102.00 Max. :102.00 Max. :109.00
##
## $Other
## xw_01 xw_02 xw_03 customer
## Min. : 9.00 Min. : 0.00 Min. : 9.00 Length:245
## 1st Qu.: 49.50 1st Qu.: 0.00 1st Qu.: 73.00 Class :character
## Median : 58.83 Median : 18.00 Median : 96.00 Mode :character
## Mean : 57.57 Mean : 25.87 Mean : 84.31
## 3rd Qu.: 67.22 3rd Qu.: 38.00 3rd Qu.:102.00
## Max. :108.00 Max. :108.00 Max. :110.00
##
## $Q
## xw_01 xw_02 xw_03 customer
## Min. :14.00 Min. : 0.00 Min. : 14.00 Length:36
## 1st Qu.:36.38 1st Qu.:15.50 1st Qu.: 47.75 Class :character
## Median :53.67 Median :32.00 Median : 76.00 Mode :character
## Mean :56.52 Mean :38.47 Mean : 71.53
## 3rd Qu.:78.38 3rd Qu.:55.75 3rd Qu.: 96.50
## Max. :98.00 Max. :98.00 Max. :105.00
Xs_outcome<-melt(data=df_all%>%select(starts_with("xs_"),"outcome"),id.vars="outcome",variable.name = "Xs_Variables",value.name = "Xs_Distribution")
Xs_outcome %>%
ggplot(mapping=aes(x=Xs_Variables,y=Xs_Distribution))+
geom_boxplot(aes(fill=outcome))
df_all %>%
select(s,outcome)%>%
split(.$outcome) %>% map(summary)
## $event
## xs_01 xs_02 xs_03 xs_04
## Min. :-0.2177 Min. :-0.43347 Min. :-0.2177 Min. :0.0000
## 1st Qu.: 0.1073 1st Qu.:-0.14100 1st Qu.: 0.1759 1st Qu.:0.2455
## Median : 0.1715 Median : 0.00000 Median : 0.3402 Median :0.2859
## Mean : 0.1660 Mean :-0.01581 Mean : 0.3603 Mean :0.3103
## 3rd Qu.: 0.2250 3rd Qu.: 0.12984 3rd Qu.: 0.4968 3rd Qu.:0.3501
## Max. : 0.5247 Max. : 0.52468 Max. : 1.1833 Max. :0.8988
## xs_05 xs_06 outcome
## Min. :0.00000 Min. :0.0000 Length:127
## 1st Qu.:0.08714 1st Qu.:0.3334 Class :character
## Median :0.15195 Median :0.4727 Mode :character
## Mean :0.18920 Mean :0.4815
## 3rd Qu.:0.26305 3rd Qu.:0.5708
## Max. :0.89883 Max. :1.2703
##
## $non_event
## xs_01 xs_02 xs_03 xs_04
## Min. :-0.3612 Min. :-0.89585 Min. :-0.3612 Min. :0.02511
## 1st Qu.: 0.1589 1st Qu.:-0.14308 1st Qu.: 0.2578 1st Qu.:0.24250
## Median : 0.2254 Median : 0.04786 Median : 0.3976 Median :0.29147
## Mean : 0.2261 Mean : 0.03107 Mean : 0.4388 Mean :0.29900
## 3rd Qu.: 0.2945 3rd Qu.: 0.22046 3rd Qu.: 0.6097 3rd Qu.:0.34113
## Max. : 0.7548 Max. : 0.69105 Max. : 1.7907 Max. :0.68960
## xs_05 xs_06 outcome
## Min. :0.00000 Min. :0.02511 Length:550
## 1st Qu.:0.07771 1st Qu.:0.29971 Class :character
## Median :0.16581 Median :0.42122 Mode :character
## Mean :0.18850 Mean :0.46320
## 3rd Qu.:0.26384 3rd Qu.:0.60198
## Max. :0.68960 Max. :1.30883
Xs_region<-melt(data=df_all%>%select(starts_with("xs_"),region),id.vars="region",variable.name = "Xs_Variables",value.name = "Xs_Distribution")
Xs_region %>%
ggplot(mapping=aes(x=Xs_Variables,y=Xs_Distribution))+
geom_boxplot(aes(fill=region))
df_all %>%
select(s,region)%>%
split(.$region) %>% map(summary)
## $XX
## xs_01 xs_02 xs_03 xs_04
## Min. :-0.09905 Min. :-0.73322 Min. :-0.09905 Min. :0.08563
## 1st Qu.: 0.16001 1st Qu.:-0.19427 1st Qu.: 0.30751 1st Qu.:0.25467
## Median : 0.21005 Median :-0.05646 Median : 0.43543 Median :0.28865
## Mean : 0.20989 Mean :-0.05512 Mean : 0.50137 Mean :0.29742
## 3rd Qu.: 0.25971 3rd Qu.: 0.14512 3rd Qu.: 0.66385 3rd Qu.:0.32932
## Max. : 0.67685 Max. : 0.67685 Max. : 1.40500 Max. :0.74984
## xs_05 xs_06 region
## Min. :0.00000 Min. :0.09744 Length:161
## 1st Qu.:0.06645 1st Qu.:0.35745 Class :character
## Median :0.11691 Median :0.51720 Mode :character
## Mean :0.14660 Mean :0.54494
## 3rd Qu.:0.20407 3rd Qu.:0.69807
## Max. :0.64732 Max. :1.30883
##
## $YY
## xs_01 xs_02 xs_03 xs_04
## Min. :-0.1789 Min. :-0.89585 Min. :-0.1789 Min. :0.09682
## 1st Qu.: 0.1562 1st Qu.:-0.24315 1st Qu.: 0.3128 1st Qu.:0.25516
## Median : 0.2070 Median :-0.07242 Median : 0.4865 Median :0.28727
## Mean : 0.2054 Mean :-0.07333 Mean : 0.5125 Mean :0.30026
## 3rd Qu.: 0.2521 3rd Qu.: 0.07709 3rd Qu.: 0.6922 3rd Qu.:0.32621
## Max. : 0.6283 Max. : 0.62832 Max. : 1.7907 Max. :0.89883
## xs_05 xs_06 region
## Min. :0.00000 Min. :0.09682 Length:222
## 1st Qu.:0.04787 1st Qu.:0.38598 Class :character
## Median :0.10632 Median :0.51745 Mode :character
## Mean :0.14386 Mean :0.53339
## 3rd Qu.:0.19564 3rd Qu.:0.68155
## Max. :0.89883 Max. :1.17974
##
## $ZZ
## xs_01 xs_02 xs_03 xs_04
## Min. :-0.3612 Min. :-0.45588 Min. :-0.3612 Min. :0.0000
## 1st Qu.: 0.1344 1st Qu.: 0.01633 1st Qu.: 0.1679 1st Qu.:0.2268
## Median : 0.2393 Median : 0.14129 Median : 0.2949 Median :0.2927
## Mean : 0.2247 Mean : 0.13685 Mean : 0.3150 Mean :0.3038
## 3rd Qu.: 0.3126 3rd Qu.: 0.26076 3rd Qu.: 0.4330 3rd Qu.:0.3692
## Max. : 0.7548 Max. : 0.69105 Max. : 1.2814 Max. :0.6896
## xs_05 xs_06 region
## Min. :0.0000 Min. :0.0000 Length:294
## 1st Qu.:0.1516 1st Qu.:0.2448 Class :character
## Median :0.2240 Median :0.3538 Mode :character
## Mean :0.2455 Mean :0.3734
## 3rd Qu.:0.3179 3rd Qu.:0.4802
## Max. :0.6896 Max. :1.2274
Xs_customer<-melt(data=df_all%>%select(starts_with("xs_"),customer),id.vars="customer",variable.name = "Xs_Variables",value.name = "Xs_Distribution")
Xs_customer %>%
ggplot(mapping=aes(x=Xs_Variables,y=Xs_Distribution))+
geom_boxplot(aes(fill=customer))
df_all %>%
select(s,customer)%>%
split(.$customer) %>% map(summary)
## $A
## xs_01 xs_02 xs_03 xs_04
## Min. :-0.2177 Min. :-0.23440 Min. :-0.2177 Min. :0.04969
## 1st Qu.: 0.1355 1st Qu.: 0.05937 1st Qu.: 0.1431 1st Qu.:0.21687
## Median : 0.2445 Median : 0.16595 Median : 0.2584 Median :0.29383
## Mean : 0.2230 Mean : 0.17350 Mean : 0.2696 Mean :0.30833
## 3rd Qu.: 0.3194 3rd Qu.: 0.29588 3rd Qu.: 0.3645 3rd Qu.:0.36930
## Max. : 0.7216 Max. : 0.66528 Max. : 1.2095 Max. :0.68960
## xs_05 xs_06 customer
## Min. :0.04969 Min. :0.04969 Length:55
## 1st Qu.:0.20251 1st Qu.:0.21687 Class :character
## Median :0.23911 Median :0.30266 Mode :character
## Mean :0.27724 Mean :0.34653
## 3rd Qu.:0.30766 3rd Qu.:0.45327
## Max. :0.68960 Max. :1.22736
##
## $B
## xs_01 xs_02 xs_03 xs_04
## Min. :-0.1426 Min. :-0.73322 Min. :-0.1324 Min. :0.1173
## 1st Qu.: 0.1628 1st Qu.:-0.37820 1st Qu.: 0.3954 1st Qu.:0.2591
## Median : 0.1904 Median :-0.21077 Median : 0.5948 Median :0.2775
## Mean : 0.1890 Mean :-0.19880 Mean : 0.6143 Mean :0.2848
## 3rd Qu.: 0.2140 3rd Qu.:-0.05854 3rd Qu.: 0.8211 3rd Qu.:0.3044
## Max. : 0.4850 Max. : 0.48504 Max. : 1.4050 Max. :0.5376
## xs_05 xs_06 customer
## Min. :0.000000 Min. :0.1173 Length:52
## 1st Qu.:0.008229 1st Qu.:0.4458 Class :character
## Median :0.047146 Median :0.6086 Mode :character
## Mean :0.092405 Mean :0.6151
## 3rd Qu.:0.117775 3rd Qu.:0.8177
## Max. :0.537558 Max. :1.2413
##
## $D
## xs_01 xs_02 xs_03 xs_04
## Min. :0.08792 Min. :-0.64470 Min. :0.2432 Min. :0.1440
## 1st Qu.:0.21626 1st Qu.:-0.20842 1st Qu.:0.3987 1st Qu.:0.2887
## Median :0.24520 Median :-0.08711 Median :0.5365 Median :0.3247
## Mean :0.24986 Mean :-0.07659 Mean :0.6376 Mean :0.3383
## 3rd Qu.:0.26532 3rd Qu.: 0.05179 3rd Qu.:0.8572 3rd Qu.:0.3538
## Max. :0.44501 Max. : 0.44501 Max. :1.2029 Max. :0.6473
## xs_05 xs_06 customer
## Min. :0.003697 Min. :0.1710 Length:32
## 1st Qu.:0.051346 1st Qu.:0.4426 Class :character
## Median :0.093479 Median :0.6289 Mode :character
## Mean :0.155485 Mean :0.6669
## 3rd Qu.:0.193913 3rd Qu.:0.7897
## Max. :0.647317 Max. :1.3088
##
## $E
## xs_01 xs_02 xs_03 xs_04
## Min. :0.04066 Min. :-0.8959 Min. :0.1481 Min. :0.1767
## 1st Qu.:0.21094 1st Qu.:-0.3637 1st Qu.:0.5271 1st Qu.:0.2995
## Median :0.23238 Median :-0.1873 Median :0.7167 Median :0.3167
## Mean :0.22909 Mean :-0.2150 Mean :0.7367 Mean :0.3189
## 3rd Qu.:0.25847 3rd Qu.: 0.0000 3rd Qu.:0.8551 3rd Qu.:0.3325
## Max. :0.37722 Max. : 0.2734 Max. :1.7907 Max. :0.5171
## xs_05 xs_06 customer
## Min. :0.00000 Min. :0.2847 Length:35
## 1st Qu.:0.01384 1st Qu.:0.5446 Class :character
## Median :0.06259 Median :0.6837 Mode :character
## Mean :0.08784 Mean :0.6986
## 3rd Qu.:0.10509 3rd Qu.:0.8342
## Max. :0.44002 Max. :1.1698
##
## $G
## xs_01 xs_02 xs_03 xs_04
## Min. :-0.3612 Min. :-0.3612 Min. :-0.3612 Min. :0.0000
## 1st Qu.: 0.1371 1st Qu.: 0.0000 1st Qu.: 0.1578 1st Qu.:0.2345
## Median : 0.2481 Median : 0.1218 Median : 0.3191 Median :0.2906
## Mean : 0.2211 Mean : 0.1274 Mean : 0.3285 Mean :0.3094
## 3rd Qu.: 0.3160 3rd Qu.: 0.2598 3rd Qu.: 0.4415 3rd Qu.:0.3624
## Max. : 0.5770 Max. : 0.4924 Max. : 1.2814 Max. :0.7498
## xs_05 xs_06 customer
## Min. :0.0000 Min. :0.0000 Length:113
## 1st Qu.:0.1449 1st Qu.:0.2749 Class :character
## Median :0.2228 Median :0.3810 Mode :character
## Mean :0.2428 Mean :0.3872
## 3rd Qu.:0.3200 3rd Qu.:0.4748
## Max. :0.6043 Max. :1.2703
##
## $K
## xs_01 xs_02 xs_03 xs_04
## Min. :-0.1929 Min. :-0.45588 Min. :-0.1929 Min. :0.02511
## 1st Qu.: 0.1099 1st Qu.: 0.00000 1st Qu.: 0.1874 1st Qu.:0.23880
## Median : 0.2545 Median : 0.10806 Median : 0.3628 Median :0.29826
## Mean : 0.2198 Mean : 0.09952 Mean : 0.3385 Mean :0.30084
## 3rd Qu.: 0.3143 3rd Qu.: 0.21411 3rd Qu.: 0.4946 3rd Qu.:0.37644
## Max. : 0.5294 Max. : 0.52943 Max. : 0.7334 Max. :0.59517
## xs_05 xs_06 customer
## Min. :0.02511 Min. :0.02511 Length:38
## 1st Qu.:0.10866 1st Qu.:0.28099 Class :character
## Median :0.21802 Median :0.36633 Mode :character
## Mean :0.23749 Mean :0.39190
## 3rd Qu.:0.31286 3rd Qu.:0.50790
## Max. :0.59517 Max. :0.87500
##
## $M
## xs_01 xs_02 xs_03 xs_04
## Min. :-0.1119 Min. :-0.2357830 Min. :-0.1119 Min. :0.01455
## 1st Qu.: 0.1298 1st Qu.: 0.0005822 1st Qu.: 0.1843 1st Qu.:0.23238
## Median : 0.2209 Median : 0.1301042 Median : 0.2947 Median :0.27622
## Mean : 0.2341 Mean : 0.1373717 Mean : 0.3271 Mean :0.30171
## 3rd Qu.: 0.2986 3rd Qu.: 0.2610634 3rd Qu.: 0.4167 3rd Qu.:0.35778
## Max. : 0.7548 Max. : 0.6910501 Max. : 1.1690 Max. :0.67638
## xs_05 xs_06 customer
## Min. :0.01455 Min. :0.01455 Length:71
## 1st Qu.:0.13840 1st Qu.:0.24491 Class :character
## Median :0.19951 Median :0.38587 Mode :character
## Mean :0.23664 Mean :0.38138
## 3rd Qu.:0.31684 3rd Qu.:0.48507
## Max. :0.67638 Max. :0.90264
##
## $Other
## xs_01 xs_02 xs_03 xs_04
## Min. :-0.08179 Min. :-0.58926 Min. :-0.05311 Min. :0.04405
## 1st Qu.: 0.14678 1st Qu.:-0.18671 1st Qu.: 0.26019 1st Qu.:0.24536
## Median : 0.20141 Median :-0.01860 Median : 0.41334 Median :0.28067
## Mean : 0.20129 Mean :-0.02167 Mean : 0.44045 Mean :0.28703
## 3rd Qu.: 0.24808 3rd Qu.: 0.14493 3rd Qu.: 0.61393 3rd Qu.:0.32161
## Max. : 0.67685 Max. : 0.67685 Max. : 1.44532 Max. :0.61849
## xs_05 xs_06 customer
## Min. :0.00000 Min. :0.04405 Length:245
## 1st Qu.:0.06906 1st Qu.:0.33333 Class :character
## Median :0.12681 Median :0.46359 Mode :character
## Mean :0.15362 Mean :0.47928
## 3rd Qu.:0.20775 3rd Qu.:0.60838
## Max. :0.56237 Max. :1.14684
##
## $Q
## xs_01 xs_02 xs_03 xs_04
## Min. :-0.1789 Min. :-0.37219 Min. :-0.1789 Min. :0.1014
## 1st Qu.: 0.1369 1st Qu.:-0.01576 1st Qu.: 0.2241 1st Qu.:0.2392
## Median : 0.2275 Median : 0.09962 Median : 0.3267 Median :0.2934
## Mean : 0.2243 Mean : 0.08974 Mean : 0.3623 Mean :0.3324
## 3rd Qu.: 0.3195 3rd Qu.: 0.19656 3rd Qu.: 0.5145 3rd Qu.:0.3504
## Max. : 0.5247 Max. : 0.52468 Max. : 1.0159 Max. :0.8988
## xs_05 xs_06 customer
## Min. :0.02799 Min. :0.1014 Length:36
## 1st Qu.:0.12687 1st Qu.:0.3115 Class :character
## Median :0.21997 Median :0.4006 Mode :character
## Mean :0.24190 Mean :0.4428
## 3rd Qu.:0.27744 3rd Qu.:0.5609
## Max. :0.89883 Max. :0.8988
There’s no obvious difference in continuous variable distributions based on outcome,region and customer.
df_all%>%
select(b)%>%
cor()%>%corrplot::corrplot(type='upper')
df_all%>%
select(n)%>%
cor()%>%corrplot::corrplot(type='upper')
df_all%>%
select(a)%>%
cor()%>%corrplot::corrplot(type='upper')
df_all%>%
select(w)%>%
cor()%>%corrplot::corrplot(type='upper')
df_all%>%
select(s)%>%
cor()%>%corrplot::corrplot(type='upper')
df_all%>%
select(b,n,a,w,s)%>%
cor()%>%corrplot::corrplot(type='upper')
Based on the figures above,
Most of the relationships between inputs which start with “xb_” is mild correlation, while some of the relationships are moderately relevant(the relativity is above 0.5 and below 0.8). The pair of inputs which are moderately relevant: {xb03,xb05},{xb04,xb08}relationships between inputs which start with “xb_” is mild correlation, while some of the relationships are moderately relevant(the relativity is above 0.5 and below 0.8). The pair of inputs which are moderately relevant: {xb03,xb05},{xb04,xb08}; Most of the relationships between inputs which start with “xn_” is mild correlation, while some of the relationships are moderately relevant(the relativity is above 0.5 and below 0.8). The pair of inputs which are moderately relevant: {xn03,xn05},{xn04,xn08};
The pair of inputs which are moderately relevant between inputs which start with “xw_”: {xw03,xw01};
All relativities are below 0.5 between inputs which start with “xs_”, while the relativity of some pairs of inputs is close to 0: {xs04,xs02},{xs04,xs03},{xs01,xs05},{xs01,xs06}
In general, most of the relationships between inputs have low correlation, while some pairs of inputs are moderately relevant, especially when they start with the same letter.
df_all_include_log_response<-df_all%>%
mutate(log_response=log(response))%>%
select(b,n,a,w,s,log_response)%>%
cor()%>%corrplot::corrplot(type='upper')
df_all%>%
select(b,n,a,w,s,response)%>%
cor()%>%corrplot::corrplot(type='upper')
df_all%>%
select(region,response)%>%
ggplot(mapping=aes(x=region,y=response))+
geom_point()
df_all%>%
select(customer,response)%>%
ggplot(mapping=aes(x=customer,y=response))+
geom_point()
Both of the two kinds of outputs are weakly related to the inputs;
I can’t see any trend between the outputs and inputs;
Besides,categorical inputs are also have a weak relationship with the outputs.
X_outcome<-melt(data=df_all%>%select(b,n,a,w,s,"outcome"),id.vars="outcome",variable.name = "Xs_Variables",value.name = "Xs_Distribution")
X_outcome %>%
ggplot(mapping=aes(x=Xs_Variables,y=Xs_Distribution))+
geom_boxplot(aes(fill=outcome))